[llvm] r205090 - ARM64: initial backend import

Alexey Samsonov samsonov at google.com
Mon Mar 31 01:07:51 PDT 2014


On Mon, Mar 31, 2014 at 11:29 AM, Evgeniy Stepanov <
eugeni.stepanov at gmail.com> wrote:

> Hi,
>
> MemorySanitizer says that ARM64 MCDisassembler::getInstruction can
> leave Size uninitialized:
>

I've tried to fix this in r205171. Tim, can you verify that the check is
correct. Looks like ARM64Disassembler::getInstruction
should never leave "Size" uninitialized:

 /// getInstruction  - Returns the disassembly of a single instruction.
  ///
<...>
  /// @param size     - A value to populate with the size of the
instruction, or
  ///                   the number of bytes consumed while attempting to
decode
  ///                   an invalid instruction.


>
> bin/llvm-mc -triple arm64-apple-darwin -disassemble <
> ../llvm/test/MC/Disassembler/ARM64/invalid-logical.txt
>         .section        __TEXT,__text,regular,pure_instructions
> <stdin>:4:1: warning: invalid instruction encoding
> 0x7b 0xbf 0x25 0x72
> ^
> ==15799== WARNING: MemorySanitizer: use-of-uninitialized-value
>     #0 0x7f349610bc1f in PrintInsts tools/llvm-mc/Disassembler.cpp:77
>     #1 0x7f349610bc1f in llvm::Disassembler::disassemble(llvm::Target
> const&, std::string const&, llvm::MCSubtargetInfo&, llvm::MCStreamer&,
> llvm::MemoryBuffer&, llvm::SourceMgr&, llvm::raw_ostream&)
> tools/llvm-mc/Disassembler.cpp:204
>     #2 0x7f34960ffff9 in main tools/llvm-mc/llvm-mc.cpp:485
>     #3 0x7f3494886ed4 in __libc_start_main
>     #4 0x7f34960f832f in _start
>
> Detected here (unfortunately there is no report in the bot logs
> because the test redirects all output to a file; you can only see MSan
> default error code 77):
>
> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2829/steps/check-llvm%20msan/logs/stdio
>
>
> On Sat, Mar 29, 2014 at 2:18 PM, Tim Northover <tnorthover at apple.com>
> wrote:
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
> (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp Sat
> Mar 29 05:18:08 2014
> > @@ -0,0 +1,563 @@
> > +//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine
> code -===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +//
> > +// This file implements the ARM64MCCodeEmitter class.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +
> > +#define DEBUG_TYPE "mccodeemitter"
> > +#include "MCTargetDesc/ARM64AddressingModes.h"
> > +#include "MCTargetDesc/ARM64BaseInfo.h"
> > +#include "MCTargetDesc/ARM64FixupKinds.h"
> > +#include "MCTargetDesc/ARM64MCExpr.h"
> > +#include "llvm/MC/MCCodeEmitter.h"
> > +#include "llvm/MC/MCContext.h"
> > +#include "llvm/MC/MCInst.h"
> > +#include "llvm/MC/MCInstrInfo.h"
> > +#include "llvm/MC/MCRegisterInfo.h"
> > +#include "llvm/MC/MCSubtargetInfo.h"
> > +#include "llvm/ADT/Statistic.h"
> > +#include "llvm/Support/raw_ostream.h"
> > +using namespace llvm;
> > +
> > +STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
> > +STATISTIC(MCNumFixups, "Number of MC fixups created.");
> > +
> > +namespace {
> > +
> > +class ARM64MCCodeEmitter : public MCCodeEmitter {
> > +  MCContext &Ctx;
> > +
> > +  ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
> > +  void operator=(const ARM64MCCodeEmitter &);     // DO NOT IMPLEMENT
> > +public:
> > +  ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo
> &sti,
> > +                     MCContext &ctx)
> > +      : Ctx(ctx) {}
> > +
> > +  ~ARM64MCCodeEmitter() {}
> > +
> > +  // getBinaryCodeForInstr - TableGen'erated function for getting the
> > +  // binary encoding for an instruction.
> > +  uint64_t getBinaryCodeForInstr(const MCInst &MI,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +
> > +  /// getMachineOpValue - Return binary encoding of operand. If the
> machine
> > +  /// operand requires relocation, record the relocation and return
> zero.
> > +  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
> > +                             SmallVectorImpl<MCFixup> &Fixups,
> > +                             const MCSubtargetInfo &STI) const;
> > +
> > +  /// getAMIndexed8OpValue - Return encoding info for base register
> > +  /// and 12-bit unsigned immediate attached to a load, store or prfm
> > +  /// instruction. If operand requires a relocation, record it and
> > +  /// return zero in that part of the encoding.
> > +  template <uint32_t FixupKind>
> > +  uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                SmallVectorImpl<MCFixup> &Fixups,
> > +                                const MCSubtargetInfo &STI) const;
> > +
> > +  /// getAdrLabelOpValue - Return encoding info for 21-bit immediate
> ADR label
> > +  /// target.
> > +  uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
> > +                              SmallVectorImpl<MCFixup> &Fixups,
> > +                              const MCSubtargetInfo &STI) const;
> > +
> > +  /// getAddSubImmOpValue - Return encoding for the 12-bit immediate
> value and
> > +  /// the 2-bit shift field.
> > +  uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
> > +                               SmallVectorImpl<MCFixup> &Fixups,
> > +                               const MCSubtargetInfo &STI) const;
> > +
> > +  /// getCondBranchTargetOpValue - Return the encoded value for a
> conditional
> > +  /// branch target.
> > +  uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                      SmallVectorImpl<MCFixup> &Fixups,
> > +                                      const MCSubtargetInfo &STI) const;
> > +
> > +  /// getTestBranchTargetOpValue - Return the encoded value for a
> test-bit-and-
> > +  /// branch target.
> > +  uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                      SmallVectorImpl<MCFixup> &Fixups,
> > +                                      const MCSubtargetInfo &STI) const;
> > +
> > +  /// getBranchTargetOpValue - Return the encoded value for an
> unconditional
> > +  /// branch target.
> > +  uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                  SmallVectorImpl<MCFixup> &Fixups,
> > +                                  const MCSubtargetInfo &STI) const;
> > +
> > +  /// getMoveWideImmOpValue - Return the encoded value for the
> immediate operand
> > +  /// of a MOVZ or MOVK instruction.
> > +  uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +
> > +  /// getVecShifterOpValue - Return the encoded value for the vector
> shifter.
> > +  uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                SmallVectorImpl<MCFixup> &Fixups,
> > +                                const MCSubtargetInfo &STI) const;
> > +
> > +  /// getMoveVecShifterOpValue - Return the encoded value for the
> vector move
> > +  /// shifter (MSL).
> > +  uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                    SmallVectorImpl<MCFixup> &Fixups,
> > +                                    const MCSubtargetInfo &STI) const;
> > +
> > +  /// getFixedPointScaleOpValue - Return the encoded value for the
> > +  // FP-to-fixed-point scale factor.
> > +  uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                     SmallVectorImpl<MCFixup> &Fixups,
> > +                                     const MCSubtargetInfo &STI) const;
> > +
> > +  uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +  uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +  uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +  uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                SmallVectorImpl<MCFixup> &Fixups,
> > +                                const MCSubtargetInfo &STI) const;
> > +  uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +  uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +  uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +  uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                SmallVectorImpl<MCFixup> &Fixups,
> > +                                const MCSubtargetInfo &STI) const;
> > +
> > +  /// getSIMDShift64OpValue - Return the encoded value for the
> > +  // shift-by-immediate AdvSIMD instructions.
> > +  uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +
> > +  uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                    SmallVectorImpl<MCFixup> &Fixups,
> > +                                    const MCSubtargetInfo &STI) const;
> > +
> > +  uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +
> > +  uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
> > +                                 SmallVectorImpl<MCFixup> &Fixups,
> > +                                 const MCSubtargetInfo &STI) const;
> > +
> > +  unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
> > +                   const MCSubtargetInfo &STI) const;
> > +
> > +  void EmitByte(unsigned char C, raw_ostream &OS) const { OS <<
> (char)C; }
> > +
> > +  void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const
> {
> > +    // Output the constant in little endian byte order.
> > +    for (unsigned i = 0; i != Size; ++i) {
> > +      EmitByte(Val & 255, OS);
> > +      Val >>= 8;
> > +    }
> > +  }
> > +
> > +  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> > +                         SmallVectorImpl<MCFixup> &Fixups,
> > +                         const MCSubtargetInfo &STI) const;
> > +};
> > +
> > +} // end anonymous namespace
> > +
> > +MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII,
> > +                                              const MCRegisterInfo &MRI,
> > +                                              const MCSubtargetInfo
> &STI,
> > +                                              MCContext &Ctx) {
> > +  return new ARM64MCCodeEmitter(MCII, STI, Ctx);
> > +}
> > +
> > +/// getMachineOpValue - Return binary encoding of operand. If the
> machine
> > +/// operand requires relocation, record the relocation and return zero.
> > +unsigned
> > +ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand
> &MO,
> > +                                      SmallVectorImpl<MCFixup> &Fixups,
> > +                                      const MCSubtargetInfo &STI) const
> {
> > +  if (MO.isReg())
> > +    return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
> > +  else {
> > +    assert(MO.isImm() && "did not expect relocated expression");
> > +    return static_cast<unsigned>(MO.getImm());
> > +  }
> > +
> > +  assert(0 && "Unable to encode MCOperand!");
> > +  return 0;
> > +}
> > +
> > +template <uint32_t FixupKind>
> > +uint32_t
> > +ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                         SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                         const MCSubtargetInfo &STI)
> const {
> > +  unsigned BaseReg = MI.getOperand(OpIdx).getReg();
> > +  BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg);
> > +
> > +  const MCOperand &MO = MI.getOperand(OpIdx + 1);
> > +  uint32_t ImmVal = 0;
> > +
> > +  if (MO.isImm())
> > +    ImmVal = static_cast<uint32_t>(MO.getImm());
> > +  else {
> > +    assert(MO.isExpr() && "unable to encode load/store imm operand");
> > +    MCFixupKind Kind = MCFixupKind(FixupKind);
> > +    Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind,
> MI.getLoc()));
> > +    ++MCNumFixups;
> > +  }
> > +
> > +  return BaseReg | (ImmVal << 5);
> > +}
> > +
> > +/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR
> label
> > +/// target.
> > +uint32_t
> > +ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
> > +                                       SmallVectorImpl<MCFixup> &Fixups,
> > +                                       const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +
> > +  // If the destination is an immediate, we have nothing to do.
> > +  if (MO.isImm())
> > +    return MO.getImm();
> > +  assert(MO.isExpr() && "Unexpected ADR target type!");
> > +  const MCExpr *Expr = MO.getExpr();
> > +
> > +  MCFixupKind Kind = MI.getOpcode() == ARM64::ADR
> > +                         ?
> MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21)
> > +                         :
> MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21);
> > +  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
> > +
> > +  MCNumFixups += 1;
> > +
> > +  // All of the information is in the fixup.
> > +  return 0;
> > +}
> > +
> > +/// getAddSubImmOpValue - Return encoding for the 12-bit immediate
> value and
> > +/// the 2-bit shift field.  The shift field is stored in bits 13-14 of
> the
> > +/// return value.
> > +uint32_t
> > +ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                        SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                        const MCSubtargetInfo &STI)
> const {
> > +  // Suboperands are [imm, shifter].
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
> > +  assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL &&
> > +         "unexpected shift type for add/sub immediate");
> > +  unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm());
> > +  assert((ShiftVal == 0 || ShiftVal == 12) &&
> > +         "unexpected shift value for add/sub immediate");
> > +  if (MO.isImm())
> > +    return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12));
> > +  assert(MO.isExpr() && "Unable to encode MCOperand!");
> > +  const MCExpr *Expr = MO.getExpr();
> > +  assert(ShiftVal == 0 && "shift not allowed on add/sub immediate with
> fixup");
> > +
> > +  // Encode the 12 bits of the fixup.
> > +  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12);
> > +  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
> > +
> > +  ++MCNumFixups;
> > +
> > +  return 0;
> > +}
> > +
> > +/// getCondBranchTargetOpValue - Return the encoded value for a
> conditional
> > +/// branch target.
> > +uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
> > +    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
> > +    const MCSubtargetInfo &STI) const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +
> > +  // If the destination is an immediate, we have nothing to do.
> > +  if (MO.isImm())
> > +    return MO.getImm();
> > +  assert(MO.isExpr() && "Unexpected target type!");
> > +
> > +  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_imm19);
> > +  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
> > +
> > +  ++MCNumFixups;
> > +
> > +  // All of the information is in the fixup.
> > +  return 0;
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +
> > +  if (MO.isImm())
> > +    return MO.getImm();
> > +  assert(MO.isExpr() && "Unexpected movz/movk immediate");
> > +
> > +  Fixups.push_back(MCFixup::Create(
> > +      0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw),
> MI.getLoc()));
> > +
> > +  ++MCNumFixups;
> > +
> > +  return 0;
> > +}
> > +
> > +/// getTestBranchTargetOpValue - Return the encoded value for a
> test-bit-and-
> > +/// branch target.
> > +uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
> > +    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
> > +    const MCSubtargetInfo &STI) const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +
> > +  // If the destination is an immediate, we have nothing to do.
> > +  if (MO.isImm())
> > +    return MO.getImm();
> > +  assert(MO.isExpr() && "Unexpected ADR target type!");
> > +
> > +  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14);
> > +  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
> > +
> > +  ++MCNumFixups;
> > +
> > +  // All of the information is in the fixup.
> > +  return 0;
> > +}
> > +
> > +/// getBranchTargetOpValue - Return the encoded value for an
> unconditional
> > +/// branch target.
> > +uint32_t
> > +ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                           SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                           const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +
> > +  // If the destination is an immediate, we have nothing to do.
> > +  if (MO.isImm())
> > +    return MO.getImm();
> > +  assert(MO.isExpr() && "Unexpected ADR target type!");
> > +
> > +  MCFixupKind Kind = MI.getOpcode() == ARM64::BL
> > +                         ? MCFixupKind(ARM64::fixup_arm64_pcrel_call26)
> > +                         :
> MCFixupKind(ARM64::fixup_arm64_pcrel_branch26);
> > +  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
> > +
> > +  ++MCNumFixups;
> > +
> > +  // All of the information is in the fixup.
> > +  return 0;
> > +}
> > +
> > +/// getVecShifterOpValue - Return the encoded value for the vector
> shifter:
> > +///
> > +///   00 -> 0
> > +///   01 -> 8
> > +///   10 -> 16
> > +///   11 -> 24
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                         SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                         const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the shift
> amount!");
> > +
> > +  switch (MO.getImm()) {
> > +  default:
> > +    break;
> > +  case 0:
> > +    return 0;
> > +  case 8:
> > +    return 1;
> > +  case 16:
> > +    return 2;
> > +  case 24:
> > +    return 3;
> > +  }
> > +
> > +  assert(false && "Invalid value for vector shift amount!");
> > +  return 0;
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the shift
> amount!");
> > +  return 64 - (MO.getImm());
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                             SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                             const MCSubtargetInfo
> &STI) const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the shift
> amount!");
> > +  return 64 - (MO.getImm() | 32);
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the shift
> amount!");
> > +  return 32 - (MO.getImm() | 16);
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the shift
> amount!");
> > +  return 16 - (MO.getImm() | 8);
> > +}
> > +
> > +/// getFixedPointScaleOpValue - Return the encoded value for the
> > +// FP-to-fixed-point scale factor.
> > +uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue(
> > +    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
> > +    const MCSubtargetInfo &STI) const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return 64 - MO.getImm();
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return 64 - MO.getImm();
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return 32 - MO.getImm();
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return 16 - MO.getImm();
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                         SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                         const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return 8 - MO.getImm();
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return MO.getImm() - 64;
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return MO.getImm() - 32;
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                          SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                          const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return MO.getImm() - 16;
> > +}
> > +
> > +uint32_t
> > +ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                         SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                         const MCSubtargetInfo &STI)
> const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() && "Expected an immediate value for the scale
> amount!");
> > +  return MO.getImm() - 8;
> > +}
> > +
> > +/// getMoveVecShifterOpValue - Return the encoded value for the vector
> move
> > +/// shifter (MSL).
> > +uint32_t
> > +ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned
> OpIdx,
> > +                                             SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                             const MCSubtargetInfo
> &STI) const {
> > +  const MCOperand &MO = MI.getOperand(OpIdx);
> > +  assert(MO.isImm() &&
> > +         "Expected an immediate value for the move shift amount!");
> > +  unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm());
> > +  assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!");
> > +  return ShiftVal == 8 ? 0 : 1;
> > +}
> > +
> > +unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned
> EncodedValue,
> > +                                     const MCSubtargetInfo &STI) const {
> > +  // If one of the signed fixup kinds is applied to a MOVZ instruction,
> the
> > +  // eventual result could be either a MOVZ or a MOVN. It's the
> MCCodeEmitter's
> > +  // job to ensure that any bits possibly affected by this are 0. This
> means we
> > +  // must zero out bit 30 (essentially emitting a MOVN).
> > +  MCOperand UImm16MO = MI.getOperand(1);
> > +
> > +  // Nothing to do if there's no fixup.
> > +  if (UImm16MO.isImm())
> > +    return EncodedValue;
> > +
> > +  return EncodedValue & ~(1u << 30);
> > +}
> > +
> > +void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI,
> raw_ostream &OS,
> > +                                           SmallVectorImpl<MCFixup>
> &Fixups,
> > +                                           const MCSubtargetInfo &STI)
> const {
> > +  if (MI.getOpcode() == ARM64::TLSDESCCALL) {
> > +    // This is a directive which applies an R_AARCH64_TLSDESC_CALL to
> the
> > +    // following (BLR) instruction. It doesn't emit any code itself so
> it
> > +    // doesn't go through the normal TableGenerated channels.
> > +    MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call);
> > +    Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(),
> Fixup));
> > +    return;
> > +  }
> > +
> > +  uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
> > +  EmitConstant(Binary, 4, OS);
> > +  ++MCNumEmitted; // Keep track of the # of mi's emitted.
> > +}
> > +
> > +#include "ARM64GenMCCodeEmitter.inc"
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,168 @@
> > +//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes
> --------===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +//
> > +// This file contains the implementation of the assembly expression
> modifiers
> > +// accepted by the AArch64 architecture (e.g. ":lo12:",
> ":gottprel_g1:", ...).
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +
> > +#define DEBUG_TYPE "aarch64symbolrefexpr"
> > +#include "ARM64MCExpr.h"
> > +#include "llvm/MC/MCAssembler.h"
> > +#include "llvm/MC/MCContext.h"
> > +#include "llvm/MC/MCELF.h"
> > +#include "llvm/MC/MCSymbol.h"
> > +#include "llvm/MC/MCValue.h"
> > +#include "llvm/Object/ELF.h"
> > +#include "llvm/Support/ErrorHandling.h"
> > +
> > +using namespace llvm;
> > +
> > +const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind
> Kind,
> > +                                       MCContext &Ctx) {
> > +  return new (Ctx) ARM64MCExpr(Expr, Kind);
> > +}
> > +
> > +StringRef ARM64MCExpr::getVariantKindName() const {
> > +  switch (static_cast<uint32_t>(getKind())) {
> > +  case VK_CALL:                return "";
> > +  case VK_LO12:                return ":lo12:";
> > +  case VK_ABS_G3:              return ":abs_g3:";
> > +  case VK_ABS_G2:              return ":abs_g2:";
> > +  case VK_ABS_G2_NC:           return ":abs_g2_nc:";
> > +  case VK_ABS_G1:              return ":abs_g1:";
> > +  case VK_ABS_G1_NC:           return ":abs_g1_nc:";
> > +  case VK_ABS_G0:              return ":abs_g0:";
> > +  case VK_ABS_G0_NC:           return ":abs_g0_nc:";
> > +  case VK_DTPREL_G2:           return ":dtprel_g2:";
> > +  case VK_DTPREL_G1:           return ":dtprel_g1:";
> > +  case VK_DTPREL_G1_NC:        return ":dtprel_g1_nc:";
> > +  case VK_DTPREL_G0:           return ":dtprel_g0:";
> > +  case VK_DTPREL_G0_NC:        return ":dtprel_g0_nc:";
> > +  case VK_DTPREL_LO12:         return ":dtprel_lo12:";
> > +  case VK_DTPREL_LO12_NC:      return ":dtprel_lo12_nc:";
> > +  case VK_TPREL_G2:            return ":tprel_g2:";
> > +  case VK_TPREL_G1:            return ":tprel_g1:";
> > +  case VK_TPREL_G1_NC:         return ":tprel_g1_nc:";
> > +  case VK_TPREL_G0:            return ":tprel_g0:";
> > +  case VK_TPREL_G0_NC:         return ":tprel_g0_nc:";
> > +  case VK_TPREL_LO12:          return ":tprel_lo12:";
> > +  case VK_TPREL_LO12_NC:       return ":tprel_lo12_nc:";
> > +  case VK_TLSDESC_LO12:        return ":tlsdesc_lo12:";
> > +  case VK_ABS_PAGE:            return "";
> > +  case VK_GOT_PAGE:            return ":got:";
> > +  case VK_GOT_LO12:            return ":got_lo12:";
> > +  case VK_GOTTPREL_PAGE:       return ":gottprel:";
> > +  case VK_GOTTPREL_LO12_NC:    return ":gottprel_lo12:";
> > +  case VK_GOTTPREL_G1:         return ":gottprel_g1:";
> > +  case VK_GOTTPREL_G0_NC:      return ":gottprel_g0_nc:";
> > +  case VK_TLSDESC:             return "";
> > +  case VK_TLSDESC_PAGE:        return ":tlsdesc:";
> > +  default:
> > +    llvm_unreachable("Invalid ELF symbol kind");
> > +  }
> > +}
> > +
> > +void ARM64MCExpr::PrintImpl(raw_ostream &OS) const {
> > +  if (getKind() != VK_NONE)
> > +    OS << getVariantKindName();
> > +  OS << *Expr;
> > +}
> > +
> > +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols.
> Perhaps
> > +// that method should be made public?
> > +// FIXME: really do above: now that two backends are using it.
> > +static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
> > +  switch (Value->getKind()) {
> > +  case MCExpr::Target:
> > +    llvm_unreachable("Can't handle nested target expr!");
> > +    break;
> > +
> > +  case MCExpr::Constant:
> > +    break;
> > +
> > +  case MCExpr::Binary: {
> > +    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
> > +    AddValueSymbolsImpl(BE->getLHS(), Asm);
> > +    AddValueSymbolsImpl(BE->getRHS(), Asm);
> > +    break;
> > +  }
> > +
> > +  case MCExpr::SymbolRef:
> > +
>  Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
> > +    break;
> > +
> > +  case MCExpr::Unary:
> > +    AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
> > +    break;
> > +  }
> > +}
> > +
> > +void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
> > +  AddValueSymbolsImpl(getSubExpr(), Asm);
> > +}
> > +
> > +const MCSection *ARM64MCExpr::FindAssociatedSection() const {
> > +  llvm_unreachable("FIXME: what goes here?");
> > +}
> > +
> > +bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
> > +                                            const MCAsmLayout *Layout)
> const {
> > +  if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
> > +    return false;
> > +
> > +  Res =
> > +      MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(),
> getKind());
> > +
> > +  return true;
> > +}
> > +
> > +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr,
> MCAssembler &Asm) {
> > +  switch (Expr->getKind()) {
> > +  case MCExpr::Target:
> > +    llvm_unreachable("Can't handle nested target expression");
> > +    break;
> > +  case MCExpr::Constant:
> > +    break;
> > +
> > +  case MCExpr::Binary: {
> > +    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
> > +    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
> > +    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
> > +    break;
> > +  }
> > +
> > +  case MCExpr::SymbolRef: {
> > +    // We're known to be under a TLS fixup, so any symbol should be
> > +    // modified. There should be only one.
> > +    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
> > +    MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
> > +    MCELF::SetType(SD, ELF::STT_TLS);
> > +    break;
> > +  }
> > +
> > +  case MCExpr::Unary:
> > +    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(),
> Asm);
> > +    break;
> > +  }
> > +}
> > +
> > +void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
> > +  switch (getSymbolLoc(Kind)) {
> > +  default:
> > +    return;
> > +  case VK_DTPREL:
> > +  case VK_GOTTPREL:
> > +  case VK_TPREL:
> > +  case VK_TLSDESC:
> > +    break;
> > +  }
> > +
> > +  fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
> > +}
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,162 @@
> > +//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*-
> C++ -*-=//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +//
> > +// This file describes ARM64-specific MCExprs, used for modifiers like
> > +// ":lo12:" or ":gottprel_g1:".
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +
> > +#ifndef LLVM_ARM64MCEXPR_H
> > +#define LLVM_ARM64MCEXPR_H
> > +
> > +#include "llvm/MC/MCExpr.h"
> > +#include "llvm/Support/ErrorHandling.h"
> > +
> > +namespace llvm {
> > +
> > +class ARM64MCExpr : public MCTargetExpr {
> > +public:
> > +  enum VariantKind {
> > +    VK_NONE     = 0x000,
> > +
> > +    // Symbol locations specifying (roughly speaking) what calculation
> should be
> > +    // performed to construct the final address for the relocated
> > +    // symbol. E.g. direct, via the GOT, ...
> > +    VK_ABS      = 0x001,
> > +    VK_SABS     = 0x002,
> > +    VK_GOT      = 0x003,
> > +    VK_DTPREL   = 0x004,
> > +    VK_GOTTPREL = 0x005,
> > +    VK_TPREL    = 0x006,
> > +    VK_TLSDESC  = 0x007,
> > +    VK_SymLocBits = 0x00f,
> > +
> > +    // Variants specifying which part of the final address calculation
> is
> > +    // used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits
> for a
> > +    // MOVZ/MOVK.
> > +    VK_PAGE     = 0x010,
> > +    VK_PAGEOFF  = 0x020,
> > +    VK_G0       = 0x030,
> > +    VK_G1       = 0x040,
> > +    VK_G2       = 0x050,
> > +    VK_G3       = 0x060,
> > +    VK_AddressFragBits = 0x0f0,
> > +
> > +    // Whether the final relocation is a checked one (where a linker
> should
> > +    // perform a range-check on the final address) or not. Note that
> this field
> > +    // is unfortunately sometimes omitted from the assembly syntax.
> E.g. :lo12:
> > +    // on its own is a non-checked relocation. We side with ELF on being
> > +    // explicit about this!
> > +    VK_NC       = 0x100,
> > +
> > +    // Convenience definitions for referring to specific textual
> representations
> > +    // of relocation specifiers. Note that this means the "_NC" is
> sometimes
> > +    // omitted in line with assembly syntax here (VK_LO12 rather than
> VK_LO12_NC
> > +    // since a user would write ":lo12:").
> > +    VK_CALL              = VK_ABS,
> > +    VK_ABS_PAGE          = VK_ABS      | VK_PAGE,
> > +    VK_ABS_G3            = VK_ABS      | VK_G3,
> > +    VK_ABS_G2            = VK_ABS      | VK_G2,
> > +    VK_ABS_G2_NC         = VK_ABS      | VK_G2      | VK_NC,
> > +    VK_ABS_G1            = VK_ABS      | VK_G1,
> > +    VK_ABS_G1_NC         = VK_ABS      | VK_G1      | VK_NC,
> > +    VK_ABS_G0            = VK_ABS      | VK_G0,
> > +    VK_ABS_G0_NC         = VK_ABS      | VK_G0      | VK_NC,
> > +    VK_LO12              = VK_ABS      | VK_PAGEOFF | VK_NC,
> > +    VK_GOT_LO12          = VK_GOT      | VK_PAGEOFF | VK_NC,
> > +    VK_GOT_PAGE          = VK_GOT      | VK_PAGE,
> > +    VK_DTPREL_G2         = VK_DTPREL   | VK_G2,
> > +    VK_DTPREL_G1         = VK_DTPREL   | VK_G1,
> > +    VK_DTPREL_G1_NC      = VK_DTPREL   | VK_G1      | VK_NC,
> > +    VK_DTPREL_G0         = VK_DTPREL   | VK_G0,
> > +    VK_DTPREL_G0_NC      = VK_DTPREL   | VK_G0      | VK_NC,
> > +    VK_DTPREL_LO12       = VK_DTPREL   | VK_PAGEOFF,
> > +    VK_DTPREL_LO12_NC    = VK_DTPREL   | VK_PAGEOFF | VK_NC,
> > +    VK_GOTTPREL_PAGE     = VK_GOTTPREL | VK_PAGE,
> > +    VK_GOTTPREL_LO12_NC  = VK_GOTTPREL | VK_PAGEOFF | VK_NC,
> > +    VK_GOTTPREL_G1       = VK_GOTTPREL | VK_G1,
> > +    VK_GOTTPREL_G0_NC    = VK_GOTTPREL | VK_G0      | VK_NC,
> > +    VK_TPREL_G2          = VK_TPREL    | VK_G2,
> > +    VK_TPREL_G1          = VK_TPREL    | VK_G1,
> > +    VK_TPREL_G1_NC       = VK_TPREL    | VK_G1      | VK_NC,
> > +    VK_TPREL_G0          = VK_TPREL    | VK_G0,
> > +    VK_TPREL_G0_NC       = VK_TPREL    | VK_G0      | VK_NC,
> > +    VK_TPREL_LO12        = VK_TPREL    | VK_PAGEOFF,
> > +    VK_TPREL_LO12_NC     = VK_TPREL    | VK_PAGEOFF | VK_NC,
> > +    VK_TLSDESC_LO12      = VK_TLSDESC  | VK_PAGEOFF | VK_NC,
> > +    VK_TLSDESC_PAGE      = VK_TLSDESC  | VK_PAGE,
> > +
> > +    VK_INVALID  = 0xfff
> > +  };
> > +
> > +private:
> > +  const MCExpr *Expr;
> > +  const VariantKind Kind;
> > +
> > +  explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind)
> > +    : Expr(Expr), Kind(Kind) {}
> > +
> > +public:
> > +  /// @name Construction
> > +  /// @{
> > +
> > +  static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
> > +                                   MCContext &Ctx);
> > +
> > +  /// @}
> > +  /// @name Accessors
> > +  /// @{
> > +
> > +  /// Get the kind of this expression.
> > +  VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
> > +
> > +  /// Get the expression this modifier applies to.
> > +  const MCExpr *getSubExpr() const { return Expr; }
> > +
> > +  /// @}
> > +  /// @name VariantKind information extractors.
> > +  /// @{
> > +
> > +  static VariantKind getSymbolLoc(VariantKind Kind) {
> > +    return static_cast<VariantKind>(Kind & VK_SymLocBits);
> > +  }
> > +
> > +  static VariantKind getAddressFrag(VariantKind Kind) {
> > +    return static_cast<VariantKind>(Kind & VK_AddressFragBits);
> > +  }
> > +
> > +  static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; }
> > +
> > +  /// @}
> > +
> > +  /// Convert the variant kind into an ELF-appropriate modifier
> > +  /// (e.g. ":got:", ":lo12:").
> > +  StringRef getVariantKindName() const;
> > +
> > +  void PrintImpl(raw_ostream &OS) const;
> > +
> > +  void AddValueSymbols(MCAssembler *) const;
> > +
> > +  const MCSection *FindAssociatedSection() const;
> > +
> > +  bool EvaluateAsRelocatableImpl(MCValue &Res,
> > +                                 const MCAsmLayout *Layout) const;
> > +
> > +  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
> > +
> > +  static bool classof(const MCExpr *E) {
> > +    return E->getKind() == MCExpr::Target;
> > +  }
> > +
> > +  static bool classof(const ARM64MCExpr *) { return true; }
> > +
> > +};
> > +} // end namespace llvm
> > +
> > +#endif
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
> (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp Sat
> Mar 29 05:18:08 2014
> > @@ -0,0 +1,167 @@
> > +//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++
> -*-===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +//
> > +// This file provides ARM64 specific target descriptions.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +
> > +#include "ARM64MCTargetDesc.h"
> > +#include "ARM64ELFStreamer.h"
> > +#include "ARM64MCAsmInfo.h"
> > +#include "InstPrinter/ARM64InstPrinter.h"
> > +#include "llvm/MC/MCCodeGenInfo.h"
> > +#include "llvm/MC/MCInstrInfo.h"
> > +#include "llvm/MC/MCRegisterInfo.h"
> > +#include "llvm/MC/MCStreamer.h"
> > +#include "llvm/MC/MCSubtargetInfo.h"
> > +#include "llvm/Support/ErrorHandling.h"
> > +#include "llvm/Support/TargetRegistry.h"
> > +
> > +#define GET_INSTRINFO_MC_DESC
> > +#include "ARM64GenInstrInfo.inc"
> > +
> > +#define GET_SUBTARGETINFO_MC_DESC
> > +#include "ARM64GenSubtargetInfo.inc"
> > +
> > +#define GET_REGINFO_MC_DESC
> > +#include "ARM64GenRegisterInfo.inc"
> > +
> > +using namespace llvm;
> > +
> > +static MCInstrInfo *createARM64MCInstrInfo() {
> > +  MCInstrInfo *X = new MCInstrInfo();
> > +  InitARM64MCInstrInfo(X);
> > +  return X;
> > +}
> > +
> > +static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT,
> StringRef CPU,
> > +                                                   StringRef FS) {
> > +  MCSubtargetInfo *X = new MCSubtargetInfo();
> > +  InitARM64MCSubtargetInfo(X, TT, CPU, FS);
> > +  return X;
> > +}
> > +
> > +static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) {
> > +  MCRegisterInfo *X = new MCRegisterInfo();
> > +  InitARM64MCRegisterInfo(X, ARM64::LR);
> > +  return X;
> > +}
> > +
> > +static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI,
> > +                                       StringRef TT) {
> > +  Triple TheTriple(TT);
> > +
> > +  MCAsmInfo *MAI;
> > +  if (TheTriple.isOSDarwin())
> > +    MAI = new ARM64MCAsmInfoDarwin();
> > +  else {
> > +    assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
> > +    MAI = new ARM64MCAsmInfoELF();
> > +  }
> > +
> > +  // Initial state of the frame pointer is SP.
> > +  unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true);
> > +  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
> > +  MAI->addInitialFrameState(Inst);
> > +
> > +  return MAI;
> > +}
> > +
> > +MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
> > +                                        CodeModel::Model CM,
> > +                                        CodeGenOpt::Level OL) {
> > +  Triple TheTriple(TT);
> > +  assert((TheTriple.isOSBinFormatELF() ||
> TheTriple.isOSBinFormatMachO()) &&
> > +         "Only expect Darwin and ELF targets");
> > +
> > +  if (CM == CodeModel::Default)
> > +    CM = CodeModel::Small;
> > +  // The default MCJIT memory managers make no guarantees about where
> they can
> > +  // find an executable page; JITed code needs to be able to refer to
> globals
> > +  // no matter how far away they are.
> > +  else if (CM == CodeModel::JITDefault)
> > +    CM = CodeModel::Large;
> > +  else if (CM != CodeModel::Small && CM != CodeModel::Large)
> > +    report_fatal_error("Only small and large code models are allowed on
> ARM64");
> > +
> > +  // ARM64 Darwin is always PIC.
> > +  if (TheTriple.isOSDarwin())
> > +    RM = Reloc::PIC_;
> > +  // On ELF platforms the default static relocation model has a smart
> enough
> > +  // linker to cope with referencing external symbols defined in a
> shared
> > +  // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
> > +  else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
> > +    RM = Reloc::Static;
> > +
> > +  MCCodeGenInfo *X = new MCCodeGenInfo();
> > +  X->InitMCCodeGenInfo(RM, CM, OL);
> > +  return X;
> > +}
> > +
> > +static MCInstPrinter *createARM64MCInstPrinter(const Target &T,
> > +                                               unsigned SyntaxVariant,
> > +                                               const MCAsmInfo &MAI,
> > +                                               const MCInstrInfo &MII,
> > +                                               const MCRegisterInfo
> &MRI,
> > +                                               const MCSubtargetInfo
> &STI) {
> > +  if (SyntaxVariant == 0)
> > +    return new ARM64InstPrinter(MAI, MII, MRI, STI);
> > +  if (SyntaxVariant == 1)
> > +    return new ARM64AppleInstPrinter(MAI, MII, MRI, STI);
> > +
> > +  return 0;
> > +}
> > +
> > +static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
> > +                                    MCContext &Ctx, MCAsmBackend &TAB,
> > +                                    raw_ostream &OS, MCCodeEmitter
> *Emitter,
> > +                                    const MCSubtargetInfo &STI, bool
> RelaxAll,
> > +                                    bool NoExecStack) {
> > +  Triple TheTriple(TT);
> > +
> > +  if (TheTriple.isOSDarwin())
> > +    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
> > +                               /*LabelSections*/ true);
> > +
> > +  return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
> NoExecStack);
> > +}
> > +
> > +// Force static initialization.
> > +extern "C" void LLVMInitializeARM64TargetMC() {
> > +  // Register the MC asm info.
> > +  RegisterMCAsmInfoFn X(TheARM64Target, createARM64MCAsmInfo);
> > +
> > +  // Register the MC codegen info.
> > +  TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
> > +                                        createARM64MCCodeGenInfo);
> > +
> > +  // Register the MC instruction info.
> > +  TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
> createARM64MCInstrInfo);
> > +
> > +  // Register the MC register info.
> > +  TargetRegistry::RegisterMCRegInfo(TheARM64Target,
> createARM64MCRegisterInfo);
> > +
> > +  // Register the MC subtarget info.
> > +  TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
> > +                                          createARM64MCSubtargetInfo);
> > +
> > +  // Register the asm backend.
> > +  TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
> createARM64AsmBackend);
> > +
> > +  // Register the MC Code Emitter
> > +  TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
> > +                                        createARM64MCCodeEmitter);
> > +
> > +  // Register the object streamer.
> > +  TargetRegistry::RegisterMCObjectStreamer(TheARM64Target,
> createMCStreamer);
> > +
> > +  // Register the MCInstPrinter.
> > +  TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
> > +                                        createARM64MCInstPrinter);
> > +}
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,62 @@
> > +//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++
> -*-===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +//
> > +// This file provides ARM64 specific target descriptions.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +
> > +#ifndef ARM64MCTARGETDESC_H
> > +#define ARM64MCTARGETDESC_H
> > +
> > +#include "llvm/Support/DataTypes.h"
> > +#include <string>
> > +
> > +namespace llvm {
> > +class MCAsmBackend;
> > +class MCCodeEmitter;
> > +class MCContext;
> > +class MCInstrInfo;
> > +class MCRegisterInfo;
> > +class MCObjectWriter;
> > +class MCSubtargetInfo;
> > +class StringRef;
> > +class Target;
> > +class raw_ostream;
> > +
> > +extern Target TheARM64Target;
> > +
> > +MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII,
> > +                                        const MCRegisterInfo &MRI,
> > +                                        const MCSubtargetInfo &STI,
> > +                                        MCContext &Ctx);
> > +MCAsmBackend *createARM64AsmBackend(const Target &T, const
> MCRegisterInfo &MRI,
> > +                                    StringRef TT, StringRef CPU);
> > +
> > +MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t
> OSABI);
> > +
> > +MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t
> CPUType,
> > +                                            uint32_t CPUSubtype);
> > +
> > +} // End llvm namespace
> > +
> > +// Defines symbolic names for ARM64 registers.  This defines a mapping
> from
> > +// register name to register number.
> > +//
> > +#define GET_REGINFO_ENUM
> > +#include "ARM64GenRegisterInfo.inc"
> > +
> > +// Defines symbolic names for the ARM64 instructions.
> > +//
> > +#define GET_INSTRINFO_ENUM
> > +#include "ARM64GenInstrInfo.inc"
> > +
> > +#define GET_SUBTARGETINFO_ENUM
> > +#include "ARM64GenSubtargetInfo.inc"
> > +
> > +#endif
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
> (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
> Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,396 @@
> > +//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer
> ------------------===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +
> > +#include "MCTargetDesc/ARM64FixupKinds.h"
> > +#include "MCTargetDesc/ARM64MCTargetDesc.h"
> > +#include "llvm/MC/MCAssembler.h"
> > +#include "llvm/MC/MCAsmLayout.h"
> > +#include "llvm/MC/MCContext.h"
> > +#include "llvm/MC/MCExpr.h"
> > +#include "llvm/MC/MCFixup.h"
> > +#include "llvm/MC/MCMachObjectWriter.h"
> > +#include "llvm/MC/MCSectionMachO.h"
> > +#include "llvm/MC/MCValue.h"
> > +#include "llvm/ADT/Twine.h"
> > +#include "llvm/Support/ErrorHandling.h"
> > +#include "llvm/Support/MachO.h"
> > +using namespace llvm;
> > +
> > +namespace {
> > +class ARM64MachObjectWriter : public MCMachObjectTargetWriter {
> > +  bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned
> &RelocType,
> > +                                  const MCSymbolRefExpr *Sym,
> > +                                  unsigned &Log2Size, const MCAssembler
> &Asm);
> > +
> > +public:
> > +  ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
> > +      : MCMachObjectTargetWriter(true /* is64Bit */, CPUType,
> CPUSubtype,
> > +                                 /*UseAggressiveSymbolFolding=*/true) {}
> > +
> > +  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler
> &Asm,
> > +                        const MCAsmLayout &Layout, const MCFragment
> *Fragment,
> > +                        const MCFixup &Fixup, MCValue Target,
> > +                        uint64_t &FixedValue);
> > +};
> > +}
> > +
> > +bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
> > +    const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr
> *Sym,
> > +    unsigned &Log2Size, const MCAssembler &Asm) {
> > +  RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED);
> > +  Log2Size = ~0U;
> > +
> > +  switch ((unsigned)Fixup.getKind()) {
> > +  default:
> > +    return false;
> > +
> > +  case FK_Data_1:
> > +    Log2Size = llvm::Log2_32(1);
> > +    return true;
> > +  case FK_Data_2:
> > +    Log2Size = llvm::Log2_32(2);
> > +    return true;
> > +  case FK_Data_4:
> > +    Log2Size = llvm::Log2_32(4);
> > +    if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
> > +      RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
> > +    return true;
> > +  case FK_Data_8:
> > +    Log2Size = llvm::Log2_32(8);
> > +    if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
> > +      RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
> > +    return true;
> > +  case ARM64::fixup_arm64_add_imm12:
> > +  case ARM64::fixup_arm64_ldst_imm12_scale1:
> > +  case ARM64::fixup_arm64_ldst_imm12_scale2:
> > +  case ARM64::fixup_arm64_ldst_imm12_scale4:
> > +  case ARM64::fixup_arm64_ldst_imm12_scale8:
> > +  case ARM64::fixup_arm64_ldst_imm12_scale16:
> > +    Log2Size = llvm::Log2_32(4);
> > +    switch (Sym->getKind()) {
> > +    default:
> > +      assert(0 && "Unexpected symbol reference variant kind!");
> > +    case MCSymbolRefExpr::VK_PAGEOFF:
> > +      RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
> > +      return true;
> > +    case MCSymbolRefExpr::VK_GOTPAGEOFF:
> > +      RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12);
> > +      return true;
> > +    case MCSymbolRefExpr::VK_TLVPPAGEOFF:
> > +      RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12);
> > +      return true;
> > +    }
> > +  case ARM64::fixup_arm64_pcrel_adrp_imm21:
> > +    Log2Size = llvm::Log2_32(4);
> > +    // This encompasses the relocation for the whole 21-bit value.
> > +    switch (Sym->getKind()) {
> > +    default:
> > +      Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                  "ADR/ADRP relocations must be GOT
> relative");
> > +    case MCSymbolRefExpr::VK_PAGE:
> > +      RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
> > +      return true;
> > +    case MCSymbolRefExpr::VK_GOTPAGE:
> > +      RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGE21);
> > +      return true;
> > +    case MCSymbolRefExpr::VK_TLVPPAGE:
> > +      RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGE21);
> > +      return true;
> > +    }
> > +    return true;
> > +  case ARM64::fixup_arm64_pcrel_branch26:
> > +  case ARM64::fixup_arm64_pcrel_call26:
> > +    Log2Size = llvm::Log2_32(4);
> > +    RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
> > +    return true;
> > +  }
> > +}
> > +
> > +void ARM64MachObjectWriter::RecordRelocation(
> > +    MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout
> &Layout,
> > +    const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
> > +    uint64_t &FixedValue) {
> > +  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
> > +
> > +  // See <reloc.h>.
> > +  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment);
> > +  unsigned Log2Size = 0;
> > +  int64_t Value = 0;
> > +  unsigned Index = 0;
> > +  unsigned IsExtern = 0;
> > +  unsigned Type = 0;
> > +  unsigned Kind = Fixup.getKind();
> > +
> > +  FixupOffset += Fixup.getOffset();
> > +
> > +  // ARM64 pcrel relocation addends do not include the section offset.
> > +  if (IsPCRel)
> > +    FixedValue += FixupOffset;
> > +
> > +  // ADRP fixups use relocations for the whole symbol value and only
> > +  // put the addend in the instruction itself. Clear out any value the
> > +  // generic code figured out from the sybmol definition.
> > +  if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21 ||
> > +      Kind == ARM64::fixup_arm64_pcrel_imm19)
> > +    FixedValue = 0;
> > +
> > +  // imm19 relocations are for conditional branches, which require
> > +  // assembler local symbols. If we got here, that's not what we have,
> > +  // so complain loudly.
> > +  if (Kind == ARM64::fixup_arm64_pcrel_imm19) {
> > +    Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                "conditional branch requires
> assembler-local"
> > +                                " label. '" +
> > +
>  Target.getSymA()->getSymbol().getName() +
> > +                                    "' is external.");
> > +    return;
> > +  }
> > +
> > +  // 14-bit branch relocations should only target internal labels, and
> so
> > +  // should never get here.
> > +  if (Kind == ARM64::fixup_arm64_pcrel_branch14) {
> > +    Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                "Invalid relocation on conditional
> branch!");
> > +    return;
> > +  }
> > +
> > +  if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(),
> Log2Size,
> > +                                  Asm)) {
> > +    Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup
> kind!");
> > +    return;
> > +  }
> > +
> > +  Value = Target.getConstant();
> > +
> > +  if (Target.isAbsolute()) { // constant
> > +    // FIXME: Should this always be extern?
> > +    // SymbolNum of 0 indicates the absolute section.
> > +    Type = MachO::ARM64_RELOC_UNSIGNED;
> > +    Index = 0;
> > +
> > +    if (IsPCRel) {
> > +      IsExtern = 1;
> > +      Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                  "PC relative absolute relocation!");
> > +
> > +      // FIXME: x86_64 sets the type to a branch reloc here. Should we
> do
> > +      // something similar?
> > +    }
> > +  } else if (Target.getSymB()) { // A - B + constant
> > +    const MCSymbol *A = &Target.getSymA()->getSymbol();
> > +    MCSymbolData &A_SD = Asm.getSymbolData(*A);
> > +    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
> > +
> > +    const MCSymbol *B = &Target.getSymB()->getSymbol();
> > +    MCSymbolData &B_SD = Asm.getSymbolData(*B);
> > +    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
> > +
> > +    // Check for "_foo at got - .", which comes through here as:
> > +    // Ltmp0:
> > +    //    ... _foo at got - Ltmp0
> > +    if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT &&
> > +        Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None &&
> > +        Layout.getSymbolOffset(&B_SD) ==
> > +            Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
> > +      // SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
> > +      Index = A_Base->getIndex();
> > +      IsExtern = 1;
> > +      Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
> > +      IsPCRel = 1;
> > +      MachO::any_relocation_info MRE;
> > +      MRE.r_word0 = FixupOffset;
> > +      MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
> > +                     (IsExtern << 27) | (Type << 28));
> > +      Writer->addRelocation(Fragment->getParent(), MRE);
> > +      return;
> > +    } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None
> ||
> > +               Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
> > +      // Otherwise, neither symbol can be modified.
> > +      Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                  "unsupported relocation of modified
> symbol");
> > +
> > +    // We don't support PCrel relocations of differences.
> > +    if (IsPCRel)
> > +      Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                  "unsupported pc-relative relocation
> of "
> > +                                  "difference");
> > +
> > +    // ARM64 always uses external relocations. If there is no symbol to
> use as
> > +    // a base address (a local symbol with no preceeding non-local
> symbol),
> > +    // error out.
> > +    //
> > +    // FIXME: We should probably just synthesize an external symbol and
> use
> > +    // that.
> > +    if (!A_Base)
> > +      Asm.getContext().FatalError(
> > +          Fixup.getLoc(),
> > +          "unsupported relocation of local symbol '" + A->getName() +
> > +              "'. Must have non-local symbol earlier in section.");
> > +    if (!B_Base)
> > +      Asm.getContext().FatalError(
> > +          Fixup.getLoc(),
> > +          "unsupported relocation of local symbol '" + B->getName() +
> > +              "'. Must have non-local symbol earlier in section.");
> > +
> > +    if (A_Base == B_Base && A_Base)
> > +      Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                  "unsupported relocation with
> identical base");
> > +
> > +    Value += (A_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
> > +                                                   &A_SD, Layout)) -
> > +             (A_Base == NULL || A_Base->getFragment() == NULL
> > +                  ? 0
> > +                  : Writer->getSymbolAddress(A_Base, Layout));
> > +    Value -= (B_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
> > +                                                   &B_SD, Layout)) -
> > +             (B_Base == NULL || B_Base->getFragment() == NULL
> > +                  ? 0
> > +                  : Writer->getSymbolAddress(B_Base, Layout));
> > +
> > +    Index = A_Base->getIndex();
> > +    IsExtern = 1;
> > +    Type = MachO::ARM64_RELOC_UNSIGNED;
> > +
> > +    MachO::any_relocation_info MRE;
> > +    MRE.r_word0 = FixupOffset;
> > +    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
> > +                   (IsExtern << 27) | (Type << 28));
> > +    Writer->addRelocation(Fragment->getParent(), MRE);
> > +
> > +    Index = B_Base->getIndex();
> > +    IsExtern = 1;
> > +    Type = MachO::ARM64_RELOC_SUBTRACTOR;
> > +  } else { // A + constant
> > +    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
> > +    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
> > +    const MCSymbolData *Base = Asm.getAtom(&SD);
> > +    const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
> > +        Fragment->getParent()->getSection());
> > +
> > +    // If the symbol is a variable and we weren't able to get a Base
> for it
> > +    // (i.e., it's not in the symbol table associated with a section)
> resolve
> > +    // the relocation based its expansion instead.
> > +    if (Symbol->isVariable() && !Base) {
> > +      // If the evaluation is an absolute value, just use that directly
> > +      // to keep things easy.
> > +      int64_t Res;
> > +      if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
> > +              Res, Layout, Writer->getSectionAddressMap())) {
> > +        FixedValue = Res;
> > +        return;
> > +      }
> > +
> > +      // FIXME: Will the Target we already have ever have any data in it
> > +      // we need to preserve and merge with the new Target? How about
> > +      // the FixedValue?
> > +      if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target,
> &Layout))
> > +        Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                    "unable to resolve variable '" +
> > +                                        Symbol->getName() + "'");
> > +      return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup,
> Target,
> > +                              FixedValue);
> > +    }
> > +
> > +    // Relocations inside debug sections always use local relocations
> when
> > +    // possible. This seems to be done because the debugger doesn't
> fully
> > +    // understand relocation entries and expects to find values that
> > +    // have already been fixed up.
> > +    if (Symbol->isInSection()) {
> > +      if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
> > +        Base = 0;
> > +    }
> > +
> > +    // ARM64 uses external relocations as much as possible. For debug
> sections,
> > +    // and for pointer-sized relocations (.quad), we allow section
> relocations.
> > +    // It's code sections that run into trouble.
> > +    if (Base) {
> > +      Index = Base->getIndex();
> > +      IsExtern = 1;
> > +
> > +      // Add the local offset, if needed.
> > +      if (Base != &SD)
> > +        Value += Layout.getSymbolOffset(&SD) -
> Layout.getSymbolOffset(Base);
> > +    } else if (Symbol->isInSection()) {
> > +      // Pointer-sized relocations can use a local relocation.
> Otherwise,
> > +      // we have to be in a debug info section.
> > +      if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
> > +        Asm.getContext().FatalError(
> > +            Fixup.getLoc(),
> > +            "unsupported relocation of local symbol '" +
> Symbol->getName() +
> > +                "'. Must have non-local symbol earlier in section.");
> > +      // Adjust the relocation to be section-relative.
> > +      // The index is the section ordinal (1-based).
> > +      const MCSectionData &SymSD =
> > +          Asm.getSectionData(SD.getSymbol().getSection());
> > +      Index = SymSD.getOrdinal() + 1;
> > +      IsExtern = 0;
> > +      Value += Writer->getSymbolAddress(&SD, Layout);
> > +
> > +      if (IsPCRel)
> > +        Value -= Writer->getFragmentAddress(Fragment, Layout) +
> > +                 Fixup.getOffset() + (1 << Log2Size);
> > +    } else {
> > +      // Resolve constant variables.
> > +      if (SD.getSymbol().isVariable()) {
> > +        int64_t Res;
> > +        if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
> > +                Res, Layout, Writer->getSectionAddressMap())) {
> > +          FixedValue = Res;
> > +          return;
> > +        }
> > +      }
> > +      Asm.getContext().FatalError(Fixup.getLoc(),
> > +                                  "unsupported relocation of variable
> '" +
> > +                                      Symbol->getName() + "'");
> > +    }
> > +  }
> > +
> > +  // If the relocation kind is Branch26, Page21, or Pageoff12, any
> addend
> > +  // is represented via an Addend relocation, not encoded directly into
> > +  // the instruction.
> > +  if ((Type == MachO::ARM64_RELOC_BRANCH26 ||
> > +       Type == MachO::ARM64_RELOC_PAGE21 ||
> > +       Type == MachO::ARM64_RELOC_PAGEOFF12) &&
> > +      Value) {
> > +    assert((Value & 0xff000000) == 0 && "Added relocation out of
> range!");
> > +
> > +    MachO::any_relocation_info MRE;
> > +    MRE.r_word0 = FixupOffset;
> > +    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
> > +                   (IsExtern << 27) | (Type << 28));
> > +    Writer->addRelocation(Fragment->getParent(), MRE);
> > +
> > +    // Now set up the Addend relocation.
> > +    Type = MachO::ARM64_RELOC_ADDEND;
> > +    Index = Value;
> > +    IsPCRel = 0;
> > +    Log2Size = 2;
> > +    IsExtern = 0;
> > +
> > +    // Put zero into the instruction itself. The addend is in the
> relocation.
> > +    Value = 0;
> > +  }
> > +
> > +  // If there's any addend left to handle, encode it in the instruction.
> > +  FixedValue = Value;
> > +
> > +  // struct relocation_info (8 bytes)
> > +  MachO::any_relocation_info MRE;
> > +  MRE.r_word0 = FixupOffset;
> > +  MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
> > +                 (IsExtern << 27) | (Type << 28));
> > +  Writer->addRelocation(Fragment->getParent(), MRE);
> > +}
> > +
> > +MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS,
> > +                                                  uint32_t CPUType,
> > +                                                  uint32_t CPUSubtype) {
> > +  return createMachObjectWriter(new ARM64MachObjectWriter(CPUType,
> CPUSubtype),
> > +                                OS, /*IsLittleEndian=*/true);
> > +}
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,14 @@
> > +add_llvm_library(LLVMARM64Desc
> > +  ARM64AsmBackend.cpp
> > +  ARM64ELFObjectWriter.cpp
> > +  ARM64ELFStreamer.cpp
> > +  ARM64MCAsmInfo.cpp
> > +  ARM64MCCodeEmitter.cpp
> > +  ARM64MCExpr.cpp
> > +  ARM64MCTargetDesc.cpp
> > +  ARM64MachObjectWriter.cpp
> > +)
> > +add_dependencies(LLVMARM64Desc ARM64CommonTableGen)
> > +
> > +# Hack: we need to include 'main' target directory to grab private
> headers
> > +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..
> ${CMAKE_CURRENT_BINARY_DIR}/..)
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,24 @@
> > +;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf
> -*--===;
> > +;
> > +;                     The LLVM Compiler Infrastructure
> > +;
> > +; This file is distributed under the University of Illinois Open Source
> > +; License. See LICENSE.TXT for details.
> > +;
> >
> +;===------------------------------------------------------------------------===;
> > +;
> > +; This is an LLVMBuild description file for the components in this
> subdirectory.
> > +;
> > +; For more information on the LLVMBuild system, please see:
> > +;
> > +;   http://llvm.org/docs/LLVMBuild.html
> > +;
> >
> +;===------------------------------------------------------------------------===;
> > +
> > +[component_0]
> > +type = Library
> > +name = ARM64Desc
> > +parent = ARM64
> > +required_libraries = ARM64AsmPrinter ARM64Info MC Support
> > +add_to_library_groups = ARM64
> > +
> >
> > Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/Makefile
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/Makefile?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/MCTargetDesc/Makefile (added)
> > +++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/Makefile Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,16 @@
> > +##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile
> -*-===##
> > +#
> > +#                     The LLVM Compiler Infrastructure
> > +#
> > +# This file is distributed under the University of Illinois Open Source
> > +# License. See LICENSE.TXT for details.
> > +#
> >
> +##===----------------------------------------------------------------------===##
> > +
> > +LEVEL = ../../../..
> > +LIBRARYNAME = LLVMARM64Desc
> > +
> > +# Hack: we need to include 'main' target directory to grab private
> headers
> > +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
> > +
> > +include $(LEVEL)/Makefile.common
> >
> > Added: llvm/trunk/lib/Target/ARM64/Makefile
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/Makefile?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/Makefile (added)
> > +++ llvm/trunk/lib/Target/ARM64/Makefile Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,25 @@
> > +##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile
> -*-===##
> > +#
> > +#                     The LLVM Compiler Infrastructure
> > +#
> > +# This file is distributed under the University of Illinois Open Source
> > +# License. See LICENSE.TXT for details.
> > +#
> >
> +##===----------------------------------------------------------------------===##
> > +
> > +LEVEL = ../../..
> > +LIBRARYNAME = LLVMARM64CodeGen
> > +TARGET = ARM64
> > +
> > +# Make sure that tblgen is run, first thing.
> > +BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \
> > +               ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \
> > +               ARM64GenDAGISel.inc \
> > +               ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \
> > +               ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \
> > +               ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \
> > +               ARM64GenMCPseudoLowering.inc
> > +
> > +DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc
> > +
> > +include $(LEVEL)/Makefile.common
> >
> > Added: llvm/trunk/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp (added)
> > +++ llvm/trunk/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,21 @@
> > +//===-- ARM64TargetInfo.cpp - ARM64 Target Implementation
> -----------------===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> >
> +//===----------------------------------------------------------------------===//
> > +
> > +#include "llvm/ADT/Triple.h"
> > +#include "llvm/Support/TargetRegistry.h"
> > +using namespace llvm;
> > +
> > +namespace llvm {
> > +Target TheARM64Target;
> > +} // end namespace llvm
> > +
> > +extern "C" void LLVMInitializeARM64TargetInfo() {
> > +  RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64Target,
> "arm64",
> > +                                                   "ARM64");
> > +}
> >
> > Added: llvm/trunk/lib/Target/ARM64/TargetInfo/CMakeLists.txt
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/TargetInfo/CMakeLists.txt?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/TargetInfo/CMakeLists.txt (added)
> > +++ llvm/trunk/lib/Target/ARM64/TargetInfo/CMakeLists.txt Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,7 @@
> > +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
> ${CMAKE_CURRENT_SOURCE_DIR}/.. )
> > +
> > +add_llvm_library(LLVMARM64Info
> > +  ARM64TargetInfo.cpp
> > +  )
> > +
> > +add_dependencies(LLVMARM64Info ARM64CommonTableGen)
> >
> > Added: llvm/trunk/lib/Target/ARM64/TargetInfo/LLVMBuild.txt
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/TargetInfo/LLVMBuild.txt?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/TargetInfo/LLVMBuild.txt (added)
> > +++ llvm/trunk/lib/Target/ARM64/TargetInfo/LLVMBuild.txt Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,24 @@
> > +;===- ./lib/Target/ARM64/TargetInfo/LLVMBuild.txt --------------*- Conf
> -*--===;
> > +;
> > +;                     The LLVM Compiler Infrastructure
> > +;
> > +; This file is distributed under the University of Illinois Open Source
> > +; License. See LICENSE.TXT for details.
> > +;
> >
> +;===------------------------------------------------------------------------===;
> > +;
> > +; This is an LLVMBuild description file for the components in this
> subdirectory.
> > +;
> > +; For more information on the LLVMBuild system, please see:
> > +;
> > +;   http://llvm.org/docs/LLVMBuild.html
> > +;
> >
> +;===------------------------------------------------------------------------===;
> > +
> > +[component_0]
> > +type = Library
> > +name = ARM64Info
> > +parent = ARM64
> > +required_libraries = MC Support
> > +add_to_library_groups = ARM64
> > +
> >
> > Added: llvm/trunk/lib/Target/ARM64/TargetInfo/Makefile
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/TargetInfo/Makefile?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM64/TargetInfo/Makefile (added)
> > +++ llvm/trunk/lib/Target/ARM64/TargetInfo/Makefile Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,15 @@
> > +##===- lib/Target/ARM64/TargetInfo/Makefile ----------------*- Makefile
> -*-===##
> > +#
> > +#                     The LLVM Compiler Infrastructure
> > +#
> > +# This file is distributed under the University of Illinois Open Source
> > +# License. See LICENSE.TXT for details.
> > +#
> >
> +##===----------------------------------------------------------------------===##
> > +LEVEL = ../../../..
> > +LIBRARYNAME = LLVMARM64Info
> > +
> > +# Hack: we need to include 'main' target directory to grab private
> headers
> > +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
> > +
> > +include $(LEVEL)/Makefile.common
> >
> > Modified: llvm/trunk/lib/Target/LLVMBuild.txt
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/LLVMBuild.txt?rev=205090&r1=205089&r2=205090&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/LLVMBuild.txt (original)
> > +++ llvm/trunk/lib/Target/LLVMBuild.txt Sat Mar 29 05:18:08 2014
> > @@ -16,7 +16,7 @@
> >
>  ;===------------------------------------------------------------------------===;
> >
> >  [common]
> > -subdirectories = AArch64 ARM CppBackend Hexagon MSP430 NVPTX Mips
> PowerPC R600 Sparc SystemZ X86 XCore
> > +subdirectories = AArch64 ARM ARM64 CppBackend Hexagon MSP430 NVPTX Mips
> PowerPC R600 Sparc SystemZ X86 XCore
> >
> >  ; This is a special group whose required libraries are extended (by
> llvm-build)
> >  ; with the best execution engine (the native JIT, if available, or the
> >
> > Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=205090&r1=205089&r2=205090&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
> > +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Sat Mar
> 29 05:18:08 2014
> > @@ -654,7 +654,9 @@ Instruction *InstCombiner::visitCallInst
> >    }
> >
> >    case Intrinsic::arm_neon_vmulls:
> > -  case Intrinsic::arm_neon_vmullu: {
> > +  case Intrinsic::arm_neon_vmullu:
> > +  case Intrinsic::arm64_neon_smull:
> > +  case Intrinsic::arm64_neon_umull: {
> >      Value *Arg0 = II->getArgOperand(0);
> >      Value *Arg1 = II->getArgOperand(1);
> >
> > @@ -664,7 +666,8 @@ Instruction *InstCombiner::visitCallInst
> >      }
> >
> >      // Check for constant LHS & RHS - in this case we just simplify.
> > -    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
> > +    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
> > +                 II->getIntrinsicID() == Intrinsic::arm64_neon_umull);
> >      VectorType *NewVT = cast<VectorType>(II->getType());
> >      if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
> >        if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
> >
> > Added: llvm/trunk/test/Analysis/CostModel/ARM64/lit.local.cfg
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/ARM64/lit.local.cfg?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/Analysis/CostModel/ARM64/lit.local.cfg (added)
> > +++ llvm/trunk/test/Analysis/CostModel/ARM64/lit.local.cfg Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,3 @@
> > +targets = set(config.root.targets_to_build.split())
> > +if not 'ARM64' in targets:
> > +    config.unsupported = True
> >
> > Added: llvm/trunk/test/Analysis/CostModel/ARM64/select.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/ARM64/select.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/Analysis/CostModel/ARM64/select.ll (added)
> > +++ llvm/trunk/test/Analysis/CostModel/ARM64/select.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,38 @@
> > +; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios
> -mcpu=cyclone | FileCheck %s
> > +target datalayout =
> "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
> > +
> > +; CHECK-LABEL: select
> > +define void @select() {
> > +    ; Scalar values
> > +  ; CHECK: cost of 1 {{.*}} select
> > +  %v1 = select i1 undef, i8 undef, i8 undef
> > +  ; CHECK: cost of 1 {{.*}} select
> > +  %v2 = select i1 undef, i16 undef, i16 undef
> > +  ; CHECK: cost of 1 {{.*}} select
> > +  %v3 = select i1 undef, i32 undef, i32 undef
> > +  ; CHECK: cost of 1 {{.*}} select
> > +  %v4 = select i1 undef, i64 undef, i64 undef
> > +  ; CHECK: cost of 1 {{.*}} select
> > +  %v5 = select i1 undef, float undef, float undef
> > +  ; CHECK: cost of 1 {{.*}} select
> > +  %v6 = select i1 undef, double undef, double undef
> > +
> > +  ; Vector values - check for vectors that have a high cost because
> they end up
> > +  ; scalarized.
> > +  ; CHECK: cost of 320 {{.*}} select
> > +  %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
> > +
> > +  ; CHECK: cost of 160 {{.*}} select
> > +  %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
> > +  ; CHECK: cost of 320 {{.*}} select
> > +  %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
> > +
> > +  ; CHECK: cost of 80 {{.*}} select
> > +  %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
> > +  ; CHECK: cost of 160 {{.*}} select
> > +  %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
> > +  ; CHECK: cost of 320 {{.*}} select
> > +  %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
> > +
> > +    ret void
> > +}
> >
> > Added: llvm/trunk/test/Analysis/CostModel/ARM64/store.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/ARM64/store.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/Analysis/CostModel/ARM64/store.ll (added)
> > +++ llvm/trunk/test/Analysis/CostModel/ARM64/store.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,22 @@
> > +; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios
> -mcpu=cyclone | FileCheck %s
> > +target datalayout =
> "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
> > +; CHECK-LABEL: store
> > +define void @store() {
> > +    ; Stores of <2 x i64> should be expensive because we don't split
> them and
> > +    ; and unaligned 16b stores have bad performance.
> > +    ; CHECK: cost of 12 {{.*}} store
> > +    store <2 x i64> undef, <2 x i64> * undef
> > +
> > +    ; We scalarize the loads/stores because there is no vector register
> name for
> > +    ; these types (they get extended to v.4h/v.2s).
> > +    ; CHECK: cost of 16 {{.*}} store
> > +    store <2 x i8> undef, <2 x i8> * undef
> > +    ; CHECK: cost of 64 {{.*}} store
> > +    store <4 x i8> undef, <4 x i8> * undef
> > +    ; CHECK: cost of 16 {{.*}} load
> > +    load <2 x i8> * undef
> > +    ; CHECK: cost of 64 {{.*}} load
> > +    load <4 x i8> * undef
> > +
> > +    ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,47 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin
> > +
> > +; Can't copy or spill / restore CPSR.
> > +; rdar://9105206
> > +
> > +define fastcc void @t() ssp align 2 {
> > +entry:
> > +  br i1 undef, label %bb3.i, label %bb2.i
> > +
> > +bb2.i:                                            ; preds = %entry
> > +  br label %bb3.i
> > +
> > +bb3.i:                                            ; preds = %bb2.i,
> %entry
> > +  br i1 undef, label
> %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71, label %bb.i69
> > +
> > +bb.i69:                                           ; preds = %bb3.i
> > +  br label %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71
> > +
> > +_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71: ; preds =
> %bb.i69, %bb3.i
> > +  %0 = select i1 undef, float 0.000000e+00, float undef
> > +  %1 = fdiv float %0, undef
> > +  %2 = fcmp ult float %1, 0xBF847AE140000000
> > +  %storemerge9 = select i1 %2, float %1, float 0.000000e+00
> > +  store float %storemerge9, float* undef, align 4
> > +  br i1 undef, label %bb42, label %bb47
> > +
> > +bb42:                                             ; preds =
> %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71
> > +  br i1 undef, label %bb46, label %bb53
> > +
> > +bb46:                                             ; preds = %bb42
> > +  br label %bb48
> > +
> > +bb47:                                             ; preds =
> %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71
> > +  br label %bb48
> > +
> > +bb48:                                             ; preds = %bb47, %bb46
> > +  br i1 undef, label %bb1.i14, label %bb.i13
> > +
> > +bb.i13:                                           ; preds = %bb48
> > +  br label %bb1.i14
> > +
> > +bb1.i14:                                          ; preds = %bb.i13,
> %bb48
> > +  br label %bb53
> > +
> > +bb53:                                             ; preds = %bb1.i14,
> %bb42
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,45 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin
> > +
> > +; rdar://9146594
> > +
> > +define void @drt_vsprintf() nounwind ssp {
> > +entry:
> > +  %do_tab_convert = alloca i32, align 4
> > +  br i1 undef, label %if.then24, label %if.else295, !dbg !13
> > +
> > +if.then24:                                        ; preds = %entry
> > +  unreachable
> > +
> > +if.else295:                                       ; preds = %entry
> > +  call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert},
> metadata !16), !dbg !18
> > +  store i32 0, i32* %do_tab_convert, align 4, !dbg !19
> > +  unreachable
> > +}
> > +
> > +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
> > +
> > +!llvm.dbg.gv = !{!0}
> > +!llvm.dbg.sp = !{!1, !7, !10, !11, !12}
> > +
> > +!0 = metadata !{i32 589876, i32 0, metadata !1, metadata !"vsplive",
> metadata !"vsplive", metadata !"", metadata !2, i32 617, metadata !6, i32
> 1, i32 1, null, null} ; [ DW_TAG_variable ]
> > +!1 = metadata !{i32 589870, metadata !20, metadata !2, metadata
> !"drt_vsprintf", metadata !"drt_vsprintf", metadata !"", i32 616, metadata
> !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null,
> null, null, i32 0} ; [ DW_TAG_subprogram ]
> > +!2 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
> > +!3 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang
> version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/
> c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", i1 true, metadata !"", i32 0,
> metadata !21, metadata !21, null, null, null, metadata !""} ; [
> DW_TAG_compile_unit ]
> > +!4 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"",
> i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, i32 0} ; [
> DW_TAG_subroutine_type ]
> > +!5 = metadata !{metadata !6}
> > +!6 = metadata !{i32 589860, null, metadata !3, metadata !"int", i32 0,
> i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
> > +!7 = metadata !{i32 589870, metadata !20, metadata !2, metadata
> !"putc_mem", metadata !"putc_mem", metadata !"", i32 30, metadata !8, i1
> true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null,
> null, i32 0} ; [ DW_TAG_subprogram ]
> > +!8 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"",
> i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !9, i32 0, i32 0} ; [
> DW_TAG_subroutine_type ]
> > +!9 = metadata !{null}
> > +!10 = metadata !{i32 589870, metadata !20, metadata !2, metadata
> !"print_double", metadata !"print_double", metadata !"", i32 203, metadata
> !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null,
> null, null, i32 0} ; [ DW_TAG_subprogram ]
> > +!11 = metadata !{i32 589870, metadata !20, metadata !2, metadata
> !"print_number", metadata !"print_number", metadata !"", i32 75, metadata
> !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null,
> null, null, i32 0} ; [ DW_TAG_subprogram ]
> > +!12 = metadata !{i32 589870, metadata !20, metadata !2, metadata
> !"get_flags", metadata !"get_flags", metadata !"", i32 508, metadata !8, i1
> true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null,
> null, i32 0} ; [ DW_TAG_subprogram ]
> > +!13 = metadata !{i32 653, i32 5, metadata !14, null}
> > +!14 = metadata !{i32 589835, metadata !20, metadata !15, i32 652, i32
> 35, i32 2} ; [ DW_TAG_lexical_block ]
> > +!15 = metadata !{i32 589835, metadata !20, metadata !1, i32 616, i32 1,
> i32 0} ; [ DW_TAG_lexical_block ]
> > +!16 = metadata !{i32 590080, metadata !17, metadata !"do_tab_convert",
> metadata !2, i32 853, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
> > +!17 = metadata !{i32 589835, metadata !20, metadata !14, i32 850, i32
> 12, i32 33} ; [ DW_TAG_lexical_block ]
> > +!18 = metadata !{i32 853, i32 11, metadata !17, null}
> > +!19 = metadata !{i32 853, i32 29, metadata !17, null}
> > +!20 = metadata !{metadata !"print.i", metadata
> !"/Volumes/Ebi/echeng/radars/r9146594"}
> > +!21 = metadata !{i32 0}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll
> (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll
> Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,12 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +define void @foo(i64 %val) {
> > +; CHECK: foo
> > +;   The stack frame store is not 64-bit aligned. Make sure we use an
> > +;   instruction that can handle that.
> > +; CHECK: stur x0, [sp, #20]
> > +  %a = alloca [49 x i32], align 4
> > +  %p32 = getelementptr inbounds [49 x i32]* %a, i64 0, i64 2
> > +  %p = bitcast i32* %p32 to i64*
> > +  store i64 %val, i64* %p, align 8
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,26 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-iOS5.0
> > +
> > +; CPSR is not allocatable so fast allocatable wouldn't mark them killed.
> > +; rdar://9313272
> > +
> > +define hidden void @t() nounwind {
> > +entry:
> > +  %cmp = icmp eq i32* null, undef
> > +  %frombool = zext i1 %cmp to i8
> > +  store i8 %frombool, i8* undef, align 1
> > +  %tmp4 = load i8* undef, align 1
> > +  %tobool = trunc i8 %tmp4 to i1
> > +  br i1 %tobool, label %land.lhs.true, label %if.end
> > +
> > +land.lhs.true:                                    ; preds = %entry
> > +  unreachable
> > +
> > +if.end:                                           ; preds = %entry
> > +  br i1 undef, label %land.lhs.true14, label %if.end33
> > +
> > +land.lhs.true14:                                  ; preds = %if.end
> > +  unreachable
> > +
> > +if.end33:                                         ; preds = %if.end
> > +  unreachable
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,31 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
> > +
> > +; Can't fold the increment by 1<<12 into a post-increment load
> > +; rdar://10301335
> > +
> > + at test_data = common global i32 0, align 4
> > +
> > +define void @t() nounwind ssp {
> > +; CHECK-LABEL: t:
> > +entry:
> > +  br label %for.body
> > +
> > +for.body:
> > +; CHECK: for.body
> > +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
> > +; CHECK: add x[[REG:[0-9]+]],
> > +; CHECK:                      x[[REG]], #4096
> > +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
> > +  %0 = shl nsw i64 %indvars.iv, 12
> > +  %add = add nsw i64 %0, 34628173824
> > +  %1 = inttoptr i64 %add to i32*
> > +  %2 = load volatile i32* %1, align 4096
> > +  store volatile i32 %2, i32* @test_data, align 4
> > +  %indvars.iv.next = add i64 %indvars.iv, 1
> > +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> > +  %exitcond = icmp eq i32 %lftr.wideiv, 200
> > +  br i1 %exitcond, label %for.end, label %for.body
> > +
> > +for.end:
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll
> (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll Sat
> Mar 29 05:18:08 2014
> > @@ -0,0 +1,40 @@
> > +; RUN: llc < %s -march=arm64
> > +
> > +; The target lowering for integer comparisons was replacing some DAG
> nodes
> > +; during operation legalization, which resulted in dangling pointers,
> > +; cycles in DAGs, and eventually crashes.  This is the testcase for
> > +; one of those crashes. (rdar://10653656)
> > +
> > +define void @test(i1 zeroext %IsArrow) nounwind ssp align 2 {
> > +entry:
> > +  br i1 undef, label %return, label %lor.lhs.false
> > +
> > +lor.lhs.false:
> > +  br i1 undef, label %return, label %if.end
> > +
> > +if.end:
> > +  %tmp.i = load i64* undef, align 8
> > +  %and.i.i.i = and i64 %tmp.i, -16
> > +  br i1 %IsArrow, label %if.else_crit_edge, label %if.end32
> > +
> > +if.else_crit_edge:
> > +  br i1 undef, label %if.end32, label %return
> > +
> > +if.end32:
> > +  %0 = icmp ult i32 undef, 3
> > +  %1 = zext i64 %tmp.i to i320
> > +  %.pn.v = select i1 %0, i320 128, i320 64
> > +  %.pn = shl i320 %1, %.pn.v
> > +  %ins346392 = or i320 %.pn, 0
> > +  store i320 %ins346392, i320* undef, align 8
> > +  br i1 undef, label %sw.bb.i.i, label %exit
> > +
> > +sw.bb.i.i:
> > +  unreachable
> > +
> > +exit:
> > +  unreachable
> > +
> > +return:
> > +  ret void
> > +}
> >
> > Added:
> llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll
> (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll
> Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,20 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define i32 @foo(<4 x i32> %a, i32 %n) nounwind {
> > +; CHECK-LABEL: foo:
> > +; CHECK: fmov w0, s0
> > +; CHECK-NEXT: ret
> > +  %b = bitcast <4 x i32> %a to i128
> > +  %c = trunc i128 %b to i32
> > +  ret i32 %c
> > +}
> > +
> > +define i64 @bar(<2 x i64> %a, i64 %n) nounwind {
> > +; CHECK-LABEL: bar:
> > +; CHECK: fmov x0, d0
> > +; CHECK-NEXT: ret
> > +  %b = bitcast <2 x i64> %a to i128
> > +  %c = trunc i128 %b to i64
> > +  ret i64 %c
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,21 @@
> > +; RUN: llc < %s -march arm64 -mcpu=cyclone | FileCheck %s
> > +; <rdar://problem/11294426>
> > +
> > + at b = private unnamed_addr constant [3 x i32] [i32 1768775988, i32
> 1685481784, i32 1836253201], align 4
> > +
> > +; The important thing for this test is that we need an unaligned load
> of `l_b'
> > +; ("ldr w2, [x1, #8]" in this case).
> > +
> > +; CHECK:      adrp x[[PAGE:[0-9]+]], {{l_b at PAGE|.Lb}}
> > +; CHECK: add  x[[ADDR:[0-9]+]], x[[PAGE]], {{l_b at PAGEOFF|:lo12:.Lb}}
> > +; CHECK-NEXT: ldr  [[VAL:w[0-9]+]], [x[[ADDR]], #8]
> > +; CHECK-NEXT: str  [[VAL]], [x0, #8]
> > +; CHECK-NEXT: ldr  [[VAL2:x[0-9]+]], [x[[ADDR]]]
> > +; CHECK-NEXT: str  [[VAL2]], [x0]
> > +
> > +define void @foo(i8* %a) {
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]*
> @b to i8*), i64 12, i32 4, i1 false)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture,
> i64, i32, i1) nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,22 @@
> > +; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic < %s |
> FileCheck %s --check-prefix=CHECK-LINUX
> > +; <rdar://problem/11392109>
> > +
> > +define hidden void @t() optsize ssp {
> > +entry:
> > +  store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64*
> undef, align 8
> > +; CHECK:             adrp    x{{[0-9]+}}, _x at GOTPAGE
> > +; CHECK:        ldr     x{{[0-9]+}}, [x{{[0-9]+}}, _x at GOTPAGEOFF]
> > +; CHECK-NEXT:        and     x{{[0-9]+}}, x{{[0-9]+}}, #0xffffffff
> > +; CHECK-NEXT:        str     x{{[0-9]+}}, [x{{[0-9]+}}]
> > +  unreachable
> > +}
> > +
> > +declare i64 @x(i32) optsize
> > +
> > +; Worth checking the Linux code is sensible too: only way to access
> > +; the GOT is via a 64-bit load. Just loading wN is unacceptable
> > +; (there's no ELF relocation to do that).
> > +
> > +; CHECK-LINUX: adrp {{x[0-9]+}}, :got:x
> > +; CHECK-LINUX: ldr {{x[0-9]+}}, [{{x[0-9]+}}, :got_lo12:x]
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,50 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios -verify-machineinstrs |
> FileCheck %s
> > +
> > +; LdStOpt bug created illegal instruction:
> > +;   %D1<def>, %D2<def> = LDPSi %X0, 1
> > +; rdar://11512047
> > +
> > +%0 = type opaque
> > +%struct.CGRect = type { %struct.CGPoint, %struct.CGSize }
> > +%struct.CGPoint = type { double, double }
> > +%struct.CGSize = type { double, double }
> > +
> > +@"OBJC_IVAR_$_UIScreen._bounds" = external hidden global i64, section
> "__DATA, __objc_ivar", align 8
> > +
> > +define hidden %struct.CGRect @t(%0* nocapture %self, i8* nocapture
> %_cmd) nounwind readonly optsize ssp {
> > +entry:
> > +; CHECK-LABEL: t:
> > +; CHECK: ldp d{{[0-9]+}}, d{{[0-9]+}}
> > +  %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8,
> !invariant.load !4
> > +  %0 = bitcast %0* %self to i8*
> > +  %add.ptr = getelementptr inbounds i8* %0, i64 %ivar
> > +  %add.ptr10.0 = bitcast i8* %add.ptr to double*
> > +  %tmp11 = load double* %add.ptr10.0, align 8
> > +  %add.ptr.sum = add i64 %ivar, 8
> > +  %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
> > +  %1 = bitcast i8* %add.ptr10.1 to double*
> > +  %tmp12 = load double* %1, align 8
> > +  %add.ptr.sum17 = add i64 %ivar, 16
> > +  %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum17
> > +  %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
> > +  %tmp = load double* %add.ptr4.1.0, align 8
> > +  %add.ptr4.1.sum = add i64 %ivar, 24
> > +  %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %add.ptr4.1.sum
> > +  %2 = bitcast i8* %add.ptr4.1.1 to double*
> > +  %tmp5 = load double* %2, align 8
> > +  %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
> > +  %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
> > +  %insert = insertvalue %struct.CGRect undef, %struct.CGPoint
> %insert16, 0
> > +  %insert7 = insertvalue %struct.CGSize undef, double %tmp, 0
> > +  %insert9 = insertvalue %struct.CGSize %insert7, double %tmp5, 1
> > +  %insert3 = insertvalue %struct.CGRect %insert, %struct.CGSize
> %insert9, 1
> > +  ret %struct.CGRect %insert3
> > +}
> > +
> > +!llvm.module.flags = !{!0, !1, !2, !3}
> > +
> > +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
> > +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32
> 0}
> > +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section",
> metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
> > +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32
> 0}
> > +!4 = metadata !{}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,65 @@
> > +; RUN: llc -march=arm64 -O0 < %s | FileCheck %s
> > +; RUN: llc -march=arm64 -O3 < %s | FileCheck %s
> > +
> > + at .str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1
> > + at .str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1
> > + at .str2 = private unnamed_addr constant [8 x i8] c"%f %lu\0A\00", align 1
> > + at .str3 = private unnamed_addr constant [7 x i8] c"%f %u\0A\00", align 1
> > +
> > +define void @testDouble(double %d) ssp {
> > +; CHECK:  fcvtzu x{{.}}, d{{.}}
> > +; CHECK:  fcvtzu w{{.}}, d{{.}}
> > +entry:
> > +  %d.addr = alloca double, align 8
> > +  store double %d, double* %d.addr, align 8
> > +  %0 = load double* %d.addr, align 8
> > +  %1 = load double* %d.addr, align 8
> > +  %conv = fptoui double %1 to i64
> > +  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x
> i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
> > +  %2 = load double* %d.addr, align 8
> > +  %3 = load double* %d.addr, align 8
> > +  %conv1 = fptoui double %3 to i32
> > +  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8
> x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
> > +  ret void
> > +}
> > +
> > +declare i32 @printf(i8*, ...)
> > +
> > +define void @testFloat(float %f) ssp {
> > +; CHECK:  fcvtzu x{{.}}, s{{.}}
> > +; CHECK:  fcvtzu w{{.}}, s{{.}}
> > +entry:
> > +  %f.addr = alloca float, align 4
> > +  store float %f, float* %f.addr, align 4
> > +  %0 = load float* %f.addr, align 4
> > +  %conv = fpext float %0 to double
> > +  %1 = load float* %f.addr, align 4
> > +  %conv1 = fptoui float %1 to i64
> > +  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x
> i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
> > +  %2 = load float* %f.addr, align 4
> > +  %conv2 = fpext float %2 to double
> > +  %3 = load float* %f.addr, align 4
> > +  %conv3 = fptoui float %3 to i32
> > +  %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7
> x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
> > +  ret void
> > +}
> > +
> > +define i32 @main(i32 %argc, i8** %argv) ssp {
> > +entry:
> > +  %retval = alloca i32, align 4
> > +  %argc.addr = alloca i32, align 4
> > +  %argv.addr = alloca i8**, align 8
> > +  store i32 0, i32* %retval
> > +  store i32 %argc, i32* %argc.addr, align 4
> > +  store i8** %argv, i8*** %argv.addr, align 8
> > +  call void @testDouble(double 1.159198e+01)
> > +  call void @testFloat(float 0x40272F1800000000)
> > +  ret i32 0
> > +}
> > +
> > +!llvm.module.flags = !{!0, !1, !2, !3}
> > +
> > +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
> > +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32
> 0}
> > +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section",
> metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
> > +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32
> 0}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,56 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios
> > +; rdar://11849816
> > +
> > + at shlib_path_substitutions = external hidden unnamed_addr global i8**,
> align 8
> > +
> > +declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
> > +
> > +declare noalias i8* @xmalloc(i64) optsize
> > +
> > +declare i64 @strlen(i8* nocapture) nounwind readonly optsize
> > +
> > +declare i8* @__strcpy_chk(i8*, i8*, i64) nounwind optsize
> > +
> > +declare i8* @__strcat_chk(i8*, i8*, i64) nounwind optsize
> > +
> > +declare noalias i8* @xstrdup(i8*) optsize
> > +
> > +define i8* @dyld_fix_path(i8* %path) nounwind optsize ssp {
> > +entry:
> > +  br i1 undef, label %if.end56, label %for.cond
> > +
> > +for.cond:                                         ; preds = %entry
> > +  br i1 undef, label %for.cond10, label %for.body
> > +
> > +for.body:                                         ; preds = %for.cond
> > +  unreachable
> > +
> > +for.cond10:                                       ; preds = %for.cond
> > +  br i1 undef, label %if.end56, label %for.body14
> > +
> > +for.body14:                                       ; preds = %for.cond10
> > +  %call22 = tail call i64 @strlen(i8* undef) nounwind optsize
> > +  %sext = shl i64 %call22, 32
> > +  %conv30 = ashr exact i64 %sext, 32
> > +  %add29 = sub i64 0, %conv30
> > +  %sub = add i64 %add29, 0
> > +  %add31 = shl i64 %sub, 32
> > +  %sext59 = add i64 %add31, 4294967296
> > +  %conv33 = ashr exact i64 %sext59, 32
> > +  %call34 = tail call noalias i8* @xmalloc(i64 %conv33) nounwind optsize
> > +  br i1 undef, label %cond.false45, label %cond.true43
> > +
> > +cond.true43:                                      ; preds = %for.body14
> > +  unreachable
> > +
> > +cond.false45:                                     ; preds = %for.body14
> > +  %add.ptr = getelementptr inbounds i8* %path, i64 %conv30
> > +  unreachable
> > +
> > +if.end56:                                         ; preds =
> %for.cond10, %entry
> > +  ret i8* null
> > +}
> > +
> > +declare i32 @strncmp(i8* nocapture, i8* nocapture, i64) nounwind
> readonly optsize
> > +
> > +declare i8* @strcpy(i8*, i8* nocapture) nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,19 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -fp-contract=fast
> | FileCheck %s --check-prefix=FAST
> > +
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
> > +target triple = "arm64-apple-ios7.0.0"
> > +
> > +;FAST-LABEL: _Z9example25v:
> > +;FAST: fcmgt.4s
> > +;FAST: ret
> > +
> > +;CHECK-LABEL: _Z9example25v:
> > +;CHECK: fcmgt.4s
> > +;CHECK: ret
> > +
> > +define <4 x i32> @_Z9example25v( <4 x float> %N0,  <4 x float> %N1) {
> > +  %A = fcmp olt <4 x float> %N0, %N1
> > +  %B = zext <4 x i1> %A to <4 x i32>
> > +  ret <4 x i32> %B
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,15 @@
> > +; RUN: llc < %s -march=arm64
> > +; Make sure we are not crashing on this test.
> > +
> > +define void @autogen_SD13158() {
> > +entry:
> > +  %B26 = frem float 0.000000e+00, undef
> > +  br i1 undef, label %CF, label %CF77
> > +
> > +CF:                                               ; preds = %CF, %CF76
> > +  store float %B26, float* undef
> > +  br i1 undef, label %CF, label %CF77
> > +
> > +CF77:                                             ; preds = %CF
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,37 @@
> > +; RUN: llc < %s -march=arm64
> > +
> > +; Make sure we are not crashing on this test.
> > +
> > +define void @autogen_SD12881() {
> > +BB:
> > +  %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer
> > +  br label %CF
> > +
> > +CF:                                               ; preds = %CF83, %CF,
> %BB
> > +  br i1 undef, label %CF, label %CF83
> > +
> > +CF83:                                             ; preds = %CF
> > +  %FC70 = sitofp <4 x i32> %B17 to <4 x double>
> > +  br label %CF
> > +}
> > +
> > +
> > +define void @autogen_SD12881_2() {
> > +BB:
> > +  %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer
> > +  br label %CF
> > +
> > +CF:                                               ; preds = %CF83, %CF,
> %BB
> > +  br i1 undef, label %CF, label %CF83
> > +
> > +CF83:                                             ; preds = %CF
> > +  %FC70 = uitofp <4 x i32> %B17 to <4 x double>
> > +  br label %CF
> > +}
> > +
> > +define void @_Z12my_example2bv() nounwind noinline ssp {
> > +entry:
> > +  %0 = fptosi <2 x double> undef to <2 x i32>
> > +  store <2 x i32> %0, <2 x i32>* undef, align 8
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> > +
> > +;CHECK-LABEL: Shuff:
> > +;CHECK: tbl.8b
> > +;CHECK: ret
> > +define <8 x i8 > @Shuff(<8 x i8> %in, <8 x i8>* %out) nounwind ssp {
> > +  %value = shufflevector <8 x i8> %in, <8 x i8> zeroinitializer, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
> > +  ret <8 x i8> %value
> > +}
> > +
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,38 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s
> > +;
> > +define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
> > +; CHECK-LABEL: bar:
> > +; CHECK: add.2d        v[[REG:[0-9]+]], v0, v1
> > +; CHECK: add   d[[REG3:[0-9]+]], d[[REG]], d1
> > +; CHECK: sub   d[[REG2:[0-9]+]], d[[REG]], d1
> > +  %add = add <2 x i64> %a, %b
> > +  %vgetq_lane = extractelement <2 x i64> %add, i32 0
> > +  %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
> > +  %add3 = add i64 %vgetq_lane, %vgetq_lane2
> > +  %sub = sub i64 %vgetq_lane, %vgetq_lane2
> > +  %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
> > +  %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
> > +  ret <2 x i64> %vecinit8
> > +}
> > +
> > +define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone
> {
> > +; CHECK-LABEL: subdd_su64:
> > +; CHECK: sub d0, d1, d0
> > +; CHECK-NEXT: ret
> > +  %vecext = extractelement <2 x i64> %a, i32 0
> > +  %vecext1 = extractelement <2 x i64> %b, i32 0
> > +  %sub.i = sub nsw i64 %vecext1, %vecext
> > +  %retval = bitcast i64 %sub.i to double
> > +  ret double %retval
> > +}
> > +
> > +define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone
> {
> > +; CHECK-LABEL: vaddd_su64:
> > +; CHECK: add d0, d1, d0
> > +; CHECK-NEXT: ret
> > +  %vecext = extractelement <2 x i64> %a, i32 0
> > +  %vecext1 = extractelement <2 x i64> %b, i32 0
> > +  %add.i = add nsw i64 %vecext1, %vecext
> > +  %retval = bitcast i64 %add.i to double
> > +  ret double %retval
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/aapcs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aapcs.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/aapcs.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/aapcs.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,86 @@
> > +; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s |
> FileCheck %s
> > +
> > + at var = global i32 0, align 4
> > +
> > +define i128 @test_i128_align(i32, i128 %arg, i32 %after) {
> > +  store i32 %after, i32* @var, align 4
> > +; CHECK: str w4, [{{x[0-9]+}}, :lo12:var]
> > +
> > +  ret i128 %arg
> > +; CHECK: mov x0, x2
> > +; CHECK: mov x1, x3
> > +}
> > +
> > + at var64 = global i64 0, align 8
> > +
> > +  ; Check stack slots are 64-bit at all times.
> > +define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
> > +                                i32 %int, i64 %long) {
> > +  ; Part of last store. Blasted scheduler.
> > +; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
> > +
> > +  %ext_bool = zext i1 %bool to i64
> > +  store volatile i64 %ext_bool, i64* @var64, align 8
> > +; CHECK: ldr w[[EXT:[0-9]+]], [sp]
> > +; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
> > +; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  %ext_char = zext i8 %char to i64
> > +  store volatile i64 %ext_char, i64* @var64, align 8
> > +; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8]
> > +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  %ext_short = zext i16 %short to i64
> > +  store volatile i64 %ext_short, i64* @var64, align 8
> > +; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16]
> > +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  %ext_int = zext i32 %int to i64
> > +  store volatile i64 %ext_int, i64* @var64, align 8
> > +; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24]
> > +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  store volatile i64 %long, i64* @var64, align 8
> > +; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  ret void
> > +}
> > +
> > +; Make sure the callee does extensions (in the absence of zext/sext
> > +; keyword on args) while we're here.
> > +
> > +define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
> > +  %ext_bool = zext i1 %bool to i64
> > +  store volatile i64 %ext_bool, i64* @var64
> > +; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
> > +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  %ext_char = sext i8 %char to i64
> > +  store volatile i64 %ext_char, i64* @var64
> > +; CHECK: sxtb [[EXT:x[0-9]+]], x1
> > +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  %ext_short = zext i16 %short to i64
> > +  store volatile i64 %ext_short, i64* @var64
> > +; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
> > +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  %ext_int = zext i32 %int to i64
> > +  store volatile i64 %ext_int, i64* @var64
> > +; CHECK: uxtw [[EXT:x[0-9]+]], x3
> > +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
> > +
> > +  ret void
> > +}
> > +
> > +declare void @variadic(i32 %a, ...)
> > +
> > +  ; Under AAPCS variadic functions have the same calling convention as
> > +  ; others. The extra arguments should go in registers rather than on
> the stack.
> > +define void @test_variadic() {
> > +  call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0)
> > +; CHECK: fmov d0, #2.0
> > +; CHECK: orr x1, xzr, #0x1
> > +; CHECK: bl variadic
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,191 @@
> > +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false |
> FileCheck %s
> > +target triple = "arm64-apple-ios7.0.0"
> > +
> > +; rdar://13625505
> > +; Here we have 9 fixed integer arguments the 9th argument in on stack,
> the
> > +; varargs start right after at 8-byte alignment.
> > +define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
> i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
> > +; CHECK-LABEL: fn9:
> > +; 9th fixed argument
> > +; CHECK: ldr {{w[0-9]+}}, [sp, #64]
> > +; CHECK: add [[ARGS:x[0-9]+]], sp, #72
> > +; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
> > +; First vararg
> > +; CHECK: ldr {{w[0-9]+}}, [sp, #72]
> > +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
> > +; Second vararg
> > +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
> > +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
> > +; Third vararg
> > +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
> > +  %1 = alloca i32, align 4
> > +  %2 = alloca i32, align 4
> > +  %3 = alloca i32, align 4
> > +  %4 = alloca i32, align 4
> > +  %5 = alloca i32, align 4
> > +  %6 = alloca i32, align 4
> > +  %7 = alloca i32, align 4
> > +  %8 = alloca i32, align 4
> > +  %9 = alloca i32, align 4
> > +  %args = alloca i8*, align 8
> > +  %a10 = alloca i32, align 4
> > +  %a11 = alloca i32, align 4
> > +  %a12 = alloca i32, align 4
> > +  store i32 %a1, i32* %1, align 4
> > +  store i32 %a2, i32* %2, align 4
> > +  store i32 %a3, i32* %3, align 4
> > +  store i32 %a4, i32* %4, align 4
> > +  store i32 %a5, i32* %5, align 4
> > +  store i32 %a6, i32* %6, align 4
> > +  store i32 %a7, i32* %7, align 4
> > +  store i32 %a8, i32* %8, align 4
> > +  store i32 %a9, i32* %9, align 4
> > +  %10 = bitcast i8** %args to i8*
> > +  call void @llvm.va_start(i8* %10)
> > +  %11 = va_arg i8** %args, i32
> > +  store i32 %11, i32* %a10, align 4
> > +  %12 = va_arg i8** %args, i32
> > +  store i32 %12, i32* %a11, align 4
> > +  %13 = va_arg i8** %args, i32
> > +  store i32 %13, i32* %a12, align 4
> > +  ret void
> > +}
> > +
> > +declare void @llvm.va_start(i8*) nounwind
> > +
> > +define i32 @main() nounwind ssp {
> > +; CHECK-LABEL: main:
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
> > +; CHECK: str {{x[0-9]+}}, [sp, #8]
> > +; CHECK: str {{w[0-9]+}}, [sp]
> > +  %a1 = alloca i32, align 4
> > +  %a2 = alloca i32, align 4
> > +  %a3 = alloca i32, align 4
> > +  %a4 = alloca i32, align 4
> > +  %a5 = alloca i32, align 4
> > +  %a6 = alloca i32, align 4
> > +  %a7 = alloca i32, align 4
> > +  %a8 = alloca i32, align 4
> > +  %a9 = alloca i32, align 4
> > +  %a10 = alloca i32, align 4
> > +  %a11 = alloca i32, align 4
> > +  %a12 = alloca i32, align 4
> > +  store i32 1, i32* %a1, align 4
> > +  store i32 2, i32* %a2, align 4
> > +  store i32 3, i32* %a3, align 4
> > +  store i32 4, i32* %a4, align 4
> > +  store i32 5, i32* %a5, align 4
> > +  store i32 6, i32* %a6, align 4
> > +  store i32 7, i32* %a7, align 4
> > +  store i32 8, i32* %a8, align 4
> > +  store i32 9, i32* %a9, align 4
> > +  store i32 10, i32* %a10, align 4
> > +  store i32 11, i32* %a11, align 4
> > +  store i32 12, i32* %a12, align 4
> > +  %1 = load i32* %a1, align 4
> > +  %2 = load i32* %a2, align 4
> > +  %3 = load i32* %a3, align 4
> > +  %4 = load i32* %a4, align 4
> > +  %5 = load i32* %a5, align 4
> > +  %6 = load i32* %a6, align 4
> > +  %7 = load i32* %a7, align 4
> > +  %8 = load i32* %a8, align 4
> > +  %9 = load i32* %a9, align 4
> > +  %10 = load i32* %a10, align 4
> > +  %11 = load i32* %a11, align 4
> > +  %12 = load i32* %a12, align 4
> > +  call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...)*
> @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32
> %9, i32 %10, i32 %11, i32 %12)
> > +  ret i32 0
> > +}
> > +
> > +;rdar://13668483
> > + at .str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1
> > +define void @foo(i8* %fmt, ...) nounwind {
> > +entry:
> > +; CHECK-LABEL: foo:
> > +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8
> > +; CHECK: ldr {{w[0-9]+}}, [sp, #48]
> > +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15
> > +; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
> > +; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]]
> > +  %fmt.addr = alloca i8*, align 8
> > +  %args = alloca i8*, align 8
> > +  %vc = alloca i32, align 4
> > +  %vv = alloca <4 x i32>, align 16
> > +  store i8* %fmt, i8** %fmt.addr, align 8
> > +  %args1 = bitcast i8** %args to i8*
> > +  call void @llvm.va_start(i8* %args1)
> > +  %0 = va_arg i8** %args, i32
> > +  store i32 %0, i32* %vc, align 4
> > +  %1 = va_arg i8** %args, <4 x i32>
> > +  store <4 x i32> %1, <4 x i32>* %vv, align 16
> > +  ret void
> > +}
> > +
> > +define void @bar(i32 %x, <4 x i32> %y) nounwind {
> > +entry:
> > +; CHECK-LABEL: bar:
> > +; CHECK: str {{q[0-9]+}}, [sp, #16]
> > +; CHECK: str {{x[0-9]+}}, [sp]
> > +  %x.addr = alloca i32, align 4
> > +  %y.addr = alloca <4 x i32>, align 16
> > +  store i32 %x, i32* %x.addr, align 4
> > +  store <4 x i32> %y, <4 x i32>* %y.addr, align 16
> > +  %0 = load i32* %x.addr, align 4
> > +  %1 = load <4 x i32>* %y.addr, align 16
> > +  call void (i8*, ...)* @foo(i8* getelementptr inbounds ([4 x i8]*
> @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
> > +  ret void
> > +}
> > +
> > +; rdar://13668927
> > +; When passing 16-byte aligned small structs as vararg, make sure the
> caller
> > +; side is 16-byte aligned on stack.
> > +%struct.s41 = type { i32, i16, i32, i16 }
> > +define void @foo2(i8* %fmt, ...) nounwind {
> > +entry:
> > +; CHECK-LABEL: foo2:
> > +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8
> > +; CHECK: ldr {{w[0-9]+}}, [sp, #48]
> > +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15
> > +; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
> > +; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]]
> > +  %fmt.addr = alloca i8*, align 8
> > +  %args = alloca i8*, align 8
> > +  %vc = alloca i32, align 4
> > +  %vs = alloca %struct.s41, align 16
> > +  store i8* %fmt, i8** %fmt.addr, align 8
> > +  %args1 = bitcast i8** %args to i8*
> > +  call void @llvm.va_start(i8* %args1)
> > +  %0 = va_arg i8** %args, i32
> > +  store i32 %0, i32* %vc, align 4
> > +  %ap.cur = load i8** %args
> > +  %1 = getelementptr i8* %ap.cur, i32 15
> > +  %2 = ptrtoint i8* %1 to i64
> > +  %3 = and i64 %2, -16
> > +  %ap.align = inttoptr i64 %3 to i8*
> > +  %ap.next = getelementptr i8* %ap.align, i32 16
> > +  store i8* %ap.next, i8** %args
> > +  %4 = bitcast i8* %ap.align to %struct.s41*
> > +  %5 = bitcast %struct.s41* %vs to i8*
> > +  %6 = bitcast %struct.s41* %4 to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16,
> i1 false)
> > +  ret void
> > +}
> > +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture,
> i64, i32, i1) nounwind
> > +
> > +define void @bar2(i32 %x, i128 %s41.coerce) nounwind {
> > +entry:
> > +; CHECK-LABEL: bar2:
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
> > +; CHECK: str {{x[0-9]+}}, [sp]
> > +  %x.addr = alloca i32, align 4
> > +  %s41 = alloca %struct.s41, align 16
> > +  store i32 %x, i32* %x.addr, align 4
> > +  %0 = bitcast %struct.s41* %s41 to i128*
> > +  store i128 %s41.coerce, i128* %0, align 1
> > +  %1 = load i32* %x.addr, align 4
> > +  %2 = bitcast %struct.s41* %s41 to i128*
> > +  %3 = load i128* %2, align 1
> > +  call void (i8*, ...)* @foo2(i8* getelementptr inbounds ([4 x i8]*
> @.str, i32 0, i32 0), i32 %1, i128 %3)
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/abi.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/abi.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/abi.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/abi.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,236 @@
> > +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false |
> FileCheck %s
> > +; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
> > +target triple = "arm64-apple-darwin"
> > +
> > +; rdar://9932559
> > +define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16
> signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8
> signext %b3, i8 signext %b4) nounwind readnone noinline {
> > +entry:
> > +; CHECK-LABEL: i8i16callee:
> > +; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4,
> sp+5.
> > +; They are i8, i16, i8 and i8.
> > +; CHECK: ldrsb {{w[0-9]+}}, [sp, #5]
> > +; CHECK: ldrsh {{w[0-9]+}}, [sp, #2]
> > +; CHECK: ldrsb {{w[0-9]+}}, [sp]
> > +; CHECK: ldrsb {{w[0-9]+}}, [sp, #4]
> > +; FAST-LABEL: i8i16callee:
> > +; FAST: ldrb  {{w[0-9]+}}, [sp, #5]
> > +; FAST: ldrb  {{w[0-9]+}}, [sp, #4]
> > +; FAST: ldrh  {{w[0-9]+}}, [sp, #2]
> > +; FAST: ldrb  {{w[0-9]+}}, [sp]
> > +  %conv = sext i8 %a4 to i64
> > +  %conv3 = sext i16 %a5 to i64
> > +  %conv8 = sext i8 %b1 to i64
> > +  %conv9 = sext i16 %b2 to i64
> > +  %conv11 = sext i8 %b3 to i64
> > +  %conv13 = sext i8 %b4 to i64
> > +  %add10 = add i64 %a2, %a1
> > +  %add12 = add i64 %add10, %a3
> > +  %add14 = add i64 %add12, %conv
> > +  %add = add i64 %add14, %conv3
> > +  %add1 = add i64 %add, %a6
> > +  %add2 = add i64 %add1, %a7
> > +  %add4 = add i64 %add2, %a8
> > +  %add5 = add i64 %add4, %conv8
> > +  %add6 = add i64 %add5, %conv9
> > +  %add7 = add i64 %add6, %conv11
> > +  %add15 = add i64 %add7, %conv13
> > +  %sext = shl i64 %add15, 32
> > +  %conv17 = ashr exact i64 %sext, 32
> > +  ret i64 %conv17
> > +}
> > +
> > +define i32 @i8i16caller() nounwind readnone {
> > +entry:
> > +; CHECK: i8i16caller
> > +; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4,
> sp+5.
> > +; They are i8, i16, i8 and i8.
> > +; CHECK: strb {{w[0-9]+}}, [sp, #5]
> > +; CHECK: strb {{w[0-9]+}}, [sp, #4]
> > +; CHECK: strh {{w[0-9]+}}, [sp, #2]
> > +; CHECK: strb {{w[0-9]+}}, [sp]
> > +; CHECK: bl
> > +; FAST: i8i16caller
> > +; FAST: strb {{w[0-9]+}}, [sp]
> > +; FAST: strh {{w[0-9]+}}, [sp, #2]
> > +; FAST: strb {{w[0-9]+}}, [sp, #4]
> > +; FAST: strb {{w[0-9]+}}, [sp, #5]
> > +; FAST: bl
> > +  %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3,
> i16 signext 4, i64 5, i64 6, i64 7, i8 signext 97, i16 signext 98, i8
> signext 99, i8 signext 100)
> > +  %conv = trunc i64 %call to i32
> > +  ret i32 %conv
> > +}
> > +
> > +; rdar://12651543
> > +define double @circle_center([2 x float] %a) nounwind ssp {
> > +  %call = tail call double @ext([2 x float] %a) nounwind
> > +; CHECK: circle_center
> > +; CHECK: bl
> > +  ret double %call
> > +}
> > +declare double @ext([2 x float])
> > +
> > +; rdar://12656141
> > +; 16-byte vector should be aligned at 16-byte when passing on stack.
> > +; A double argument will be passed on stack, so vecotr should be at
> sp+16.
> > +define double @fixed_4i(<4 x i32>* nocapture %in) nounwind {
> > +entry:
> > +; CHECK: fixed_4i
> > +; CHECK: str [[REG_1:q[0-9]+]], [sp, #16]
> > +; FAST: fixed_4i
> > +; FAST: mov x[[ADDR:[0-9]+]], sp
> > +; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16]
> > +  %0 = load <4 x i32>* %in, align 16
> > +  %call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32>
> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0,
> <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3)
> > +  ret double %call
> > +}
> > +declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4
> x i32>, <4 x i32>, <4 x i32>, <4 x i32>, double, <4 x i32>, i8 signext)
> > +
> > +; rdar://12695237
> > +; d8 at sp, i in register w0.
> > + at g_d = common global double 0.000000e+00, align 8
> > +define void @test1(float %f1, double %d1, double %d2, double %d3,
> double %d4,
> > +       double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind
> ssp {
> > +entry:
> > +; CHECK: test1
> > +; CHECK: ldr [[REG_1:d[0-9]+]], [sp]
> > +; CHECK: scvtf [[REG_2:s[0-9]+]], w0
> > +; CHECK: fadd s0, [[REG_2]], s0
> > +  %conv = sitofp i32 %i to float
> > +  %add = fadd float %conv, %f1
> > +  %conv1 = fpext float %add to double
> > +  %add2 = fadd double %conv1, %d7
> > +  %add3 = fadd double %add2, %d8
> > +  store double %add3, double* @g_d, align 8
> > +  ret void
> > +}
> > +
> > +; i9 at sp, d1 in register s0.
> > +define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
> > +            i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp {
> > +entry:
> > +; CHECK: test2
> > +; CHECK: scvtf [[REG_2:s[0-9]+]], w0
> > +; CHECK: fadd s0, [[REG_2]], s0
> > +; CHECK: ldr [[REG_1:s[0-9]+]], [sp]
> > +  %conv = sitofp i32 %i1 to float
> > +  %add = fadd float %conv, %d1
> > +  %conv1 = fpext float %add to double
> > +  %conv2 = sitofp i32 %i8 to double
> > +  %add3 = fadd double %conv2, %conv1
> > +  %conv4 = sitofp i32 %i9 to double
> > +  %add5 = fadd double %conv4, %add3
> > +  store double %add5, double* @g_d, align 8
> > +  ret void
> > +}
> > +
> > +; rdar://12648441
> > +; Check alignment on stack for v64, f64, i64, f32, i32.
> > +define double @test3(<2 x i32>* nocapture %in) nounwind {
> > +entry:
> > +; CHECK: test3
> > +; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
> > +; FAST: test3
> > +; FAST: mov x[[ADDR:[0-9]+]], sp
> > +; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
> > +  %0 = load <2 x i32>* %in, align 8
> > +  %call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32>
> %0,
> > +          <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x
> i32> %0,
> > +          <2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3)
> > +  ret double %call
> > +}
> > +declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2
> x i32>,
> > +               <2 x i32>, <2 x i32>, <2 x i32>, float, <2 x i32>, i8
> signext)
> > +
> > +define double @test4(double* nocapture %in) nounwind {
> > +entry:
> > +; CHECK: test4
> > +; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
> > +; CHECK: str [[REG_2:w[0-9]+]], [sp]
> > +; CHECK: orr w0, wzr, #0x3
> > +  %0 = load double* %in, align 8
> > +  %call = tail call double @args_f64(double 3.000000e+00, double %0,
> double %0,
> > +          double %0, double %0, double %0, double %0, double %0,
> > +          float 3.000000e+00, double %0, i8 signext 3)
> > +  ret double %call
> > +}
> > +declare double @args_f64(double, double, double, double, double,
> double, double,
> > +               double, float, double, i8 signext)
> > +
> > +define i64 @test5(i64* nocapture %in) nounwind {
> > +entry:
> > +; CHECK: test5
> > +; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16]
> > +; CHECK: str [[REG_1:x[0-9]+]], [sp, #8]
> > +; CHECK: str [[REG_2:w[0-9]+]], [sp]
> > +  %0 = load i64* %in, align 8
> > +  %call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64
> %0, i64 %0,
> > +                         i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3)
> > +  ret i64 %call
> > +}
> > +declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64,
> > +             i8 signext)
> > +
> > +define i32 @test6(float* nocapture %in) nounwind {
> > +entry:
> > +; CHECK: test6
> > +; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
> > +; CHECK: str [[REG_1:s[0-9]+]], [sp, #4]
> > +; CHECK: strh [[REG_3:w[0-9]+]], [sp]
> > +  %0 = load float* %in, align 4
> > +  %call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5,
> i32 6,
> > +          i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0,
> float 5.0,
> > +          float 6.0, float 7.0, float 8.0, i16 signext 3, float %0,
> > +          i8 signext 3)
> > +  ret i32 %call
> > +}
> > +declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32,
> > +                      float, float, float, float, float, float, float,
> float,
> > +                      i16 signext, float, i8 signext)
> > +
> > +define i32 @test7(i32* nocapture %in) nounwind {
> > +entry:
> > +; CHECK: test7
> > +; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
> > +; CHECK: str [[REG_1:w[0-9]+]], [sp, #4]
> > +; CHECK: strh [[REG_3:w[0-9]+]], [sp]
> > +  %0 = load i32* %in, align 4
> > +  %call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32
> %0, i32 %0,
> > +                         i32 %0, i32 %0, i16 signext 3, i32 %0, i8
> signext 4)
> > +  ret i32 %call
> > +}
> > +declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16
> signext, i32,
> > +             i8 signext)
> > +
> > +define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
> > +entry:
> > +; CHECK: test8
> > +; CHECK: strb {{w[0-9]+}}, [sp, #3]
> > +; CHECK: strb wzr, [sp, #2]
> > +; CHECK: strb {{w[0-9]+}}, [sp, #1]
> > +; CHECK: strb wzr, [sp]
> > +; CHECK: bl
> > +; FAST: test8
> > +; FAST: strb {{w[0-9]+}}, [sp]
> > +; FAST: strb {{w[0-9]+}}, [sp, #1]
> > +; FAST: strb {{w[0-9]+}}, [sp, #2]
> > +; FAST: strb {{w[0-9]+}}, [sp, #3]
> > +; FAST: bl
> > +  tail call void @args_i1(i1 zeroext false, i1 zeroext true, i1 zeroext
> false,
> > +                  i1 zeroext true, i1 zeroext false, i1 zeroext true,
> > +                  i1 zeroext false, i1 zeroext true, i1 zeroext false,
> > +                  i1 zeroext true, i1 zeroext false, i1 zeroext true)
> > +  ret i32 0
> > +}
> > +
> > +declare void @args_i1(i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
> > +                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
> > +                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext)
> > +
> > +define i32 @i1_stack_incoming(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
> i64 %f,
> > +                               i64 %g, i64 %h, i64 %i, i1 zeroext %j) {
> > +; CHECK-LABEL: i1_stack_incoming:
> > +; CHECK: ldrb w0, [sp, #8]
> > +; CHECK: ret
> > +  %v = zext i1 %j to i32
> > +  ret i32 %v
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/abi_align.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/abi_align.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/abi_align.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/abi_align.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,529 @@
> > +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false |
> FileCheck %s
> > +; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
> > +target triple = "arm64-apple-darwin"
> > +
> > +; rdar://12648441
> > +; Generated from arm64-arguments.c with -O2.
> > +; Test passing structs with size < 8, < 16 and > 16
> > +; with alignment of 16 and without
> > +
> > +; Structs with size < 8
> > +%struct.s38 = type { i32, i16 }
> > +; With alignment of 16, the size will be padded to multiple of 16 bytes.
> > +%struct.s39 = type { i32, i16, [10 x i8] }
> > +; Structs with size < 16
> > +%struct.s40 = type { i32, i16, i32, i16 }
> > +%struct.s41 = type { i32, i16, i32, i16 }
> > +; Structs with size > 16
> > +%struct.s42 = type { i32, i16, i32, i16, i32, i16 }
> > +%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
> > +
> > + at g38 = common global %struct.s38 zeroinitializer, align 4
> > + at g38_2 = common global %struct.s38 zeroinitializer, align 4
> > + at g39 = common global %struct.s39 zeroinitializer, align 16
> > + at g39_2 = common global %struct.s39 zeroinitializer, align 16
> > + at g40 = common global %struct.s40 zeroinitializer, align 4
> > + at g40_2 = common global %struct.s40 zeroinitializer, align 4
> > + at g41 = common global %struct.s41 zeroinitializer, align 16
> > + at g41_2 = common global %struct.s41 zeroinitializer, align 16
> > + at g42 = common global %struct.s42 zeroinitializer, align 4
> > + at g42_2 = common global %struct.s42 zeroinitializer, align 4
> > + at g43 = common global %struct.s43 zeroinitializer, align 16
> > + at g43_2 = common global %struct.s43 zeroinitializer, align 16
> > +
> > +; structs with size < 8 bytes, passed via i64 in x1 and x2
> > +define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
> > +entry:
> > +; CHECK: f38
> > +; CHECK: add w[[A:[0-9]+]], w1, w0
> > +; CHECK: add {{w[0-9]+}}, w[[A]], w2
> > +  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
> > +  %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
> > +  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
> > +  %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
> > +  %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
> > +  %sext = trunc i64 %sext8 to i32
> > +  %conv = ashr exact i32 %sext, 16
> > +  %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
> > +  %sext10 = trunc i64 %sext1011 to i32
> > +  %conv6 = ashr exact i32 %sext10, 16
> > +  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
> > +  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
> > +  %add4 = add i32 %add3, %conv
> > +  %add7 = add i32 %add4, %conv6
> > +  ret i32 %add7
> > +}
> > +
> > +define i32 @caller38() #1 {
> > +entry:
> > +; CHECK: caller38
> > +; CHECK: ldr x1,
> > +; CHECK: ldr x2,
> > +  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
> > +  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
> > +  %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
> > +  ret i32 %call
> > +}
> > +
> > +declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32
> %i6,
> > +                i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64
> %s2.coerce) #0
> > +
> > +; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
> > +; i9 at [sp]
> > +define i32 @caller38_stack() #1 {
> > +entry:
> > +; CHECK: caller38_stack
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
> > +; CHECK: movz w[[C:[0-9]+]], #9
> > +; CHECK: str w[[C]], [sp]
> > +  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
> > +  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
> > +  %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5,
> i32 6,
> > +                                   i32 7, i32 8, i32 9, i64 %0, i64 %1)
> #5
> > +  ret i32 %call
> > +}
> > +
> > +; structs with size < 8 bytes, alignment of 16
> > +; passed via i128 in x1 and x3
> > +define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
> > +entry:
> > +; CHECK: f39
> > +; CHECK: add w[[A:[0-9]+]], w1, w0
> > +; CHECK: add {{w[0-9]+}}, w[[A]], w3
> > +  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
> > +  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
> > +  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
> > +  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
> > +  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
> > +  %sext = trunc i128 %sext8 to i32
> > +  %conv = ashr exact i32 %sext, 16
> > +  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
> > +  %sext10 = trunc i128 %sext1011 to i32
> > +  %conv6 = ashr exact i32 %sext10, 16
> > +  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
> > +  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
> > +  %add4 = add i32 %add3, %conv
> > +  %add7 = add i32 %add4, %conv6
> > +  ret i32 %add7
> > +}
> > +
> > +define i32 @caller39() #1 {
> > +entry:
> > +; CHECK: caller39
> > +; CHECK: ldp x1, x2,
> > +; CHECK: ldp x3, x4,
> > +  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
> > +  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
> > +  %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
> > +  ret i32 %call
> > +}
> > +
> > +declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32
> %i6,
> > +                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128
> %s2.coerce) #0
> > +
> > +; structs with size < 8 bytes, alignment 16
> > +; passed on stack at [sp+16] and [sp+32]
> > +define i32 @caller39_stack() #1 {
> > +entry:
> > +; CHECK: caller39_stack
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
> > +; CHECK: movz w[[C:[0-9]+]], #9
> > +; CHECK: str w[[C]], [sp]
> > +  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
> > +  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
> > +  %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5,
> i32 6,
> > +                                   i32 7, i32 8, i32 9, i128 %0, i128
> %1) #5
> > +  ret i32 %call
> > +}
> > +
> > +; structs with size < 16 bytes
> > +; passed via i128 in x1 and x3
> > +define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
> > +entry:
> > +; CHECK: f40
> > +; CHECK: add w[[A:[0-9]+]], w1, w0
> > +; CHECK: add {{w[0-9]+}}, w[[A]], w3
> > +  %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
> > +  %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
> > +  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
> > +  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
> > +  %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
> > +  %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
> > +  %sext = trunc i64 %sext8 to i32
> > +  %conv = ashr exact i32 %sext, 16
> > +  %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
> > +  %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
> > +  %sext10 = trunc i64 %sext1011 to i32
> > +  %conv6 = ashr exact i32 %sext10, 16
> > +  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
> > +  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
> > +  %add4 = add i32 %add3, %conv
> > +  %add7 = add i32 %add4, %conv6
> > +  ret i32 %add7
> > +}
> > +
> > +define i32 @caller40() #1 {
> > +entry:
> > +; CHECK: caller40
> > +; CHECK: ldp x1, x2,
> > +; CHECK: ldp x3, x4,
> > +  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align
> 4
> > +  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*),
> align 4
> > +  %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
> > +  ret i32 %call
> > +}
> > +
> > +declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32
> %i6,
> > +                i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x
> i64] %s2.coerce) #0
> > +
> > +; structs with size < 16 bytes
> > +; passed on stack at [sp+8] and [sp+24]
> > +define i32 @caller40_stack() #1 {
> > +entry:
> > +; CHECK: caller40_stack
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
> > +; CHECK: movz w[[C:[0-9]+]], #9
> > +; CHECK: str w[[C]], [sp]
> > +  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align
> 4
> > +  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*),
> align 4
> > +  %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5,
> i32 6,
> > +                         i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64]
> %1) #5
> > +  ret i32 %call
> > +}
> > +
> > +; structs with size < 16 bytes, alignment of 16
> > +; passed via i128 in x1 and x3
> > +define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
> > +entry:
> > +; CHECK: f41
> > +; CHECK: add w[[A:[0-9]+]], w1, w0
> > +; CHECK: add {{w[0-9]+}}, w[[A]], w3
> > +  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
> > +  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
> > +  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
> > +  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
> > +  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
> > +  %sext = trunc i128 %sext8 to i32
> > +  %conv = ashr exact i32 %sext, 16
> > +  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
> > +  %sext10 = trunc i128 %sext1011 to i32
> > +  %conv6 = ashr exact i32 %sext10, 16
> > +  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
> > +  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
> > +  %add4 = add i32 %add3, %conv
> > +  %add7 = add i32 %add4, %conv6
> > +  ret i32 %add7
> > +}
> > +
> > +define i32 @caller41() #1 {
> > +entry:
> > +; CHECK: caller41
> > +; CHECK: ldp x1, x2,
> > +; CHECK: ldp x3, x4,
> > +  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
> > +  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
> > +  %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
> > +  ret i32 %call
> > +}
> > +
> > +declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32
> %i6,
> > +                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128
> %s2.coerce) #0
> > +
> > +; structs with size < 16 bytes, alignment of 16
> > +; passed on stack at [sp+16] and [sp+32]
> > +define i32 @caller41_stack() #1 {
> > +entry:
> > +; CHECK: caller41_stack
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
> > +; CHECK: movz w[[C:[0-9]+]], #9
> > +; CHECK: str w[[C]], [sp]
> > +  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
> > +  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
> > +  %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5,
> i32 6,
> > +                            i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
> > +  ret i32 %call
> > +}
> > +
> > +; structs with size of 22 bytes, passed indirectly in x1 and x2
> > +define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42*
> nocapture %s2) #2 {
> > +entry:
> > +; CHECK: f42
> > +; CHECK: ldr w[[A:[0-9]+]], [x1]
> > +; CHECK: ldr w[[B:[0-9]+]], [x2]
> > +; CHECK: add w[[C:[0-9]+]], w[[A]], w0
> > +; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
> > +; FAST: f42
> > +; FAST: ldr w[[A:[0-9]+]], [x1]
> > +; FAST: ldr w[[B:[0-9]+]], [x2]
> > +; FAST: add w[[C:[0-9]+]], w[[A]], w0
> > +; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
> > +  %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0
> > +  %0 = load i32* %i1, align 4, !tbaa !0
> > +  %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0
> > +  %1 = load i32* %i2, align 4, !tbaa !0
> > +  %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1
> > +  %2 = load i16* %s, align 2, !tbaa !3
> > +  %conv = sext i16 %2 to i32
> > +  %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1
> > +  %3 = load i16* %s5, align 2, !tbaa !3
> > +  %conv6 = sext i16 %3 to i32
> > +  %add = add i32 %0, %i
> > +  %add3 = add i32 %add, %1
> > +  %add4 = add i32 %add3, %conv
> > +  %add7 = add i32 %add4, %conv6
> > +  ret i32 %add7
> > +}
> > +
> > +; For s1, we allocate a 22-byte space, pass its address via x1
> > +define i32 @caller42() #3 {
> > +entry:
> > +; CHECK: caller42
> > +; CHECK: str {{x[0-9]+}}, [sp, #48]
> > +; CHECK: str {{q[0-9]+}}, [sp, #32]
> > +; CHECK: str {{x[0-9]+}}, [sp, #16]
> > +; CHECK: str {{q[0-9]+}}, [sp]
> > +; CHECK: add x1, sp, #32
> > +; CHECK: mov x2, sp
> > +; Space for s1 is allocated at sp+32
> > +; Space for s2 is allocated at sp
> > +
> > +; FAST: caller42
> > +; FAST: sub sp, sp, #96
> > +; Space for s1 is allocated at fp-24 = sp+72
> > +; Space for s2 is allocated at sp+48
> > +; FAST: sub x[[A:[0-9]+]], fp, #24
> > +; FAST: add x[[A:[0-9]+]], sp, #48
> > +; Call memcpy with size = 24 (0x18)
> > +; FAST: orr {{x[0-9]+}}, xzr, #0x18
> > +  %tmp = alloca %struct.s42, align 4
> > +  %tmp1 = alloca %struct.s42, align 4
> > +  %0 = bitcast %struct.s42* %tmp to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast
> (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
> > +  %1 = bitcast %struct.s42* %tmp1 to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast
> (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
> > +  %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
> > +  ret i32 %call
> > +}
> > +
> > +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture,
> i64, i32, i1) #4
> > +
> > +declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32
> %i6,
> > +                       i32 %i7, i32 %i8, i32 %i9, %struct.s42*
> nocapture %s1,
> > +                       %struct.s42* nocapture %s2) #2
> > +
> > +define i32 @caller42_stack() #3 {
> > +entry:
> > +; CHECK: caller42_stack
> > +; CHECK: mov fp, sp
> > +; CHECK: sub sp, sp, #96
> > +; CHECK: stur {{x[0-9]+}}, [fp, #-16]
> > +; CHECK: stur {{q[0-9]+}}, [fp, #-32]
> > +; CHECK: str {{x[0-9]+}}, [sp, #48]
> > +; CHECK: str {{q[0-9]+}}, [sp, #32]
> > +; Space for s1 is allocated at fp-32 = sp+64
> > +; Space for s2 is allocated at sp+32
> > +; CHECK: add x[[B:[0-9]+]], sp, #32
> > +; CHECK: str x[[B]], [sp, #16]
> > +; CHECK: sub x[[A:[0-9]+]], fp, #32
> > +; Address of s1 is passed on stack at sp+8
> > +; CHECK: str x[[A]], [sp, #8]
> > +; CHECK: movz w[[C:[0-9]+]], #9
> > +; CHECK: str w[[C]], [sp]
> > +
> > +; FAST: caller42_stack
> > +; Space for s1 is allocated at fp-24
> > +; Space for s2 is allocated at fp-48
> > +; FAST: sub x[[A:[0-9]+]], fp, #24
> > +; FAST: sub x[[B:[0-9]+]], fp, #48
> > +; Call memcpy with size = 24 (0x18)
> > +; FAST: orr {{x[0-9]+}}, xzr, #0x18
> > +; FAST: str {{w[0-9]+}}, [sp]
> > +; Address of s1 is passed on stack at sp+8
> > +; FAST: str {{x[0-9]+}}, [sp, #8]
> > +; FAST: str {{x[0-9]+}}, [sp, #16]
> > +  %tmp = alloca %struct.s42, align 4
> > +  %tmp1 = alloca %struct.s42, align 4
> > +  %0 = bitcast %struct.s42* %tmp to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast
> (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
> > +  %1 = bitcast %struct.s42* %tmp1 to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast
> (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
> > +  %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
> i32 7,
> > +                       i32 8, i32 9, %struct.s42* %tmp, %struct.s42*
> %tmp1) #5
> > +  ret i32 %call
> > +}
> > +
> > +; structs with size of 22 bytes, alignment of 16
> > +; passed indirectly in x1 and x2
> > +define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43*
> nocapture %s2) #2 {
> > +entry:
> > +; CHECK: f43
> > +; CHECK: ldr w[[A:[0-9]+]], [x1]
> > +; CHECK: ldr w[[B:[0-9]+]], [x2]
> > +; CHECK: add w[[C:[0-9]+]], w[[A]], w0
> > +; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
> > +; FAST: f43
> > +; FAST: ldr w[[A:[0-9]+]], [x1]
> > +; FAST: ldr w[[B:[0-9]+]], [x2]
> > +; FAST: add w[[C:[0-9]+]], w[[A]], w0
> > +; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
> > +  %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0
> > +  %0 = load i32* %i1, align 4, !tbaa !0
> > +  %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0
> > +  %1 = load i32* %i2, align 4, !tbaa !0
> > +  %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1
> > +  %2 = load i16* %s, align 2, !tbaa !3
> > +  %conv = sext i16 %2 to i32
> > +  %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1
> > +  %3 = load i16* %s5, align 2, !tbaa !3
> > +  %conv6 = sext i16 %3 to i32
> > +  %add = add i32 %0, %i
> > +  %add3 = add i32 %add, %1
> > +  %add4 = add i32 %add3, %conv
> > +  %add7 = add i32 %add4, %conv6
> > +  ret i32 %add7
> > +}
> > +
> > +define i32 @caller43() #3 {
> > +entry:
> > +; CHECK: caller43
> > +; CHECK: str {{q[0-9]+}}, [sp, #48]
> > +; CHECK: str {{q[0-9]+}}, [sp, #32]
> > +; CHECK: str {{q[0-9]+}}, [sp, #16]
> > +; CHECK: str {{q[0-9]+}}, [sp]
> > +; CHECK: add x1, sp, #32
> > +; CHECK: mov x2, sp
> > +; Space for s1 is allocated at sp+32
> > +; Space for s2 is allocated at sp
> > +
> > +; FAST: caller43
> > +; FAST: mov fp, sp
> > +; Space for s1 is allocated at sp+32
> > +; Space for s2 is allocated at sp
> > +; FAST: add x1, sp, #32
> > +; FAST: mov x2, sp
> > +; FAST: str {{x[0-9]+}}, [sp, #32]
> > +; FAST: str {{x[0-9]+}}, [sp, #40]
> > +; FAST: str {{x[0-9]+}}, [sp, #48]
> > +; FAST: str {{x[0-9]+}}, [sp, #56]
> > +; FAST: str {{x[0-9]+}}, [sp]
> > +; FAST: str {{x[0-9]+}}, [sp, #8]
> > +; FAST: str {{x[0-9]+}}, [sp, #16]
> > +; FAST: str {{x[0-9]+}}, [sp, #24]
> > +  %tmp = alloca %struct.s43, align 16
> > +  %tmp1 = alloca %struct.s43, align 16
> > +  %0 = bitcast %struct.s43* %tmp to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast
> (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
> > +  %1 = bitcast %struct.s43* %tmp1 to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast
> (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
> > +  %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
> > +  ret i32 %call
> > +}
> > +
> > +declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32
> %i6,
> > +                       i32 %i7, i32 %i8, i32 %i9, %struct.s43*
> nocapture %s1,
> > +                       %struct.s43* nocapture %s2) #2
> > +
> > +define i32 @caller43_stack() #3 {
> > +entry:
> > +; CHECK: caller43_stack
> > +; CHECK: mov fp, sp
> > +; CHECK: sub sp, sp, #96
> > +; CHECK: stur {{q[0-9]+}}, [fp, #-16]
> > +; CHECK: stur {{q[0-9]+}}, [fp, #-32]
> > +; CHECK: str {{q[0-9]+}}, [sp, #48]
> > +; CHECK: str {{q[0-9]+}}, [sp, #32]
> > +; Space for s1 is allocated at fp-32 = sp+64
> > +; Space for s2 is allocated at sp+32
> > +; CHECK: add x[[B:[0-9]+]], sp, #32
> > +; CHECK: str x[[B]], [sp, #16]
> > +; CHECK: sub x[[A:[0-9]+]], fp, #32
> > +; Address of s1 is passed on stack at sp+8
> > +; CHECK: str x[[A]], [sp, #8]
> > +; CHECK: movz w[[C:[0-9]+]], #9
> > +; CHECK: str w[[C]], [sp]
> > +
> > +; FAST: caller43_stack
> > +; FAST: sub sp, sp, #96
> > +; Space for s1 is allocated at fp-32 = sp+64
> > +; Space for s2 is allocated at sp+32
> > +; FAST: sub x[[A:[0-9]+]], fp, #32
> > +; FAST: add x[[B:[0-9]+]], sp, #32
> > +; FAST: stur {{x[0-9]+}}, [fp, #-32]
> > +; FAST: stur {{x[0-9]+}}, [fp, #-24]
> > +; FAST: stur {{x[0-9]+}}, [fp, #-16]
> > +; FAST: stur {{x[0-9]+}}, [fp, #-8]
> > +; FAST: str {{x[0-9]+}}, [sp, #32]
> > +; FAST: str {{x[0-9]+}}, [sp, #40]
> > +; FAST: str {{x[0-9]+}}, [sp, #48]
> > +; FAST: str {{x[0-9]+}}, [sp, #56]
> > +; FAST: str {{w[0-9]+}}, [sp]
> > +; Address of s1 is passed on stack at sp+8
> > +; FAST: str {{x[0-9]+}}, [sp, #8]
> > +; FAST: str {{x[0-9]+}}, [sp, #16]
> > +  %tmp = alloca %struct.s43, align 16
> > +  %tmp1 = alloca %struct.s43, align 16
> > +  %0 = bitcast %struct.s43* %tmp to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast
> (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
> > +  %1 = bitcast %struct.s43* %tmp1 to i8*
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast
> (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
> > +  %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
> i32 7,
> > +                       i32 8, i32 9, %struct.s43* %tmp, %struct.s43*
> %tmp1) #5
> > +  ret i32 %call
> > +}
> > +
> > +; rdar://13668927
> > +; Check that we don't split an i128.
> > +declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32
> %i5,
> > +                               i32 %i6, i32 %i7, i128 %s1, i32 %i8)
> > +
> > +define i32 @i128_split() {
> > +entry:
> > +; CHECK: i128_split
> > +; "i128 %0" should be on stack at [sp].
> > +; "i32 8" should be on stack at [sp, #16].
> > +; CHECK: str {{w[0-9]+}}, [sp, #16]
> > +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
> > +; FAST: i128_split
> > +; FAST: mov x[[ADDR:[0-9]+]], sp
> > +; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
> > +; FAST: stp {{x[0-9]+}}, {{x[0-9]+}}, [x[[ADDR]]]
> > +  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
> > +  %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4,
> i32 5,
> > +                                           i32 6, i32 7, i128 %0, i32
> 8) #5
> > +  ret i32 %call
> > +}
> > +
> > +declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
> > +                               i32 %i6, i32 %i7, i64 %s1, i32 %i8)
> > +
> > +define i32 @i64_split() {
> > +entry:
> > +; CHECK: i64_split
> > +; "i64 %0" should be in register x7.
> > +; "i32 8" should be on stack at [sp].
> > +; CHECK: ldr x7, [{{x[0-9]+}}]
> > +; CHECK: str {{w[0-9]+}}, [sp]
> > +; FAST: i64_split
> > +; FAST: ldr x7, [{{x[0-9]+}}]
> > +; FAST: str {{w[0-9]+}}, [sp]
> > +  %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
> > +  %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
> > +                                    i32 6, i32 7, i64 %0, i32 8) #5
> > +  ret i32 %call
> > +}
> > +
> > +attributes #0 = { noinline nounwind readnone
> "fp-contract-model"="standard" "relocation-model"="pic"
> "ssp-buffers-size"="8" }
> > +attributes #1 = { nounwind readonly "fp-contract-model"="standard"
> "relocation-model"="pic" "ssp-buffers-size"="8" }
> > +attributes #2 = { noinline nounwind readonly
> "fp-contract-model"="standard" "relocation-model"="pic"
> "ssp-buffers-size"="8" }
> > +attributes #3 = { nounwind "fp-contract-model"="standard"
> "relocation-model"="pic" "ssp-buffers-size"="8" }
> > +attributes #4 = { nounwind }
> > +attributes #5 = { nobuiltin }
> > +
> > +!0 = metadata !{metadata !"int", metadata !1}
> > +!1 = metadata !{metadata !"omnipotent char", metadata !2}
> > +!2 = metadata !{metadata !"Simple C/C++ TBAA"}
> > +!3 = metadata !{metadata !"short", metadata !1}
> > +!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3,
> i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4,
> metadata !0, i64 20, i64 2, metadata !3}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/addp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/addp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/addp.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,32 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define double @foo(<2 x double> %a) nounwind {
> > +; CHECK-LABEL: foo:
> > +; CHECK: faddp.2d d0, v0
> > +; CHECK-NEXT: ret
> > +  %lane0.i = extractelement <2 x double> %a, i32 0
> > +  %lane1.i = extractelement <2 x double> %a, i32 1
> > +  %vpaddd.i = fadd double %lane0.i, %lane1.i
> > +  ret double %vpaddd.i
> > +}
> > +
> > +define i64 @foo0(<2 x i64> %a) nounwind {
> > +; CHECK-LABEL: foo0:
> > +; CHECK: addp.2d d0, v0
> > +; CHECK-NEXT: fmov x0, d0
> > +; CHECK-NEXT: ret
> > +  %lane0.i = extractelement <2 x i64> %a, i32 0
> > +  %lane1.i = extractelement <2 x i64> %a, i32 1
> > +  %vpaddd.i = add i64 %lane0.i, %lane1.i
> > +  ret i64 %vpaddd.i
> > +}
> > +
> > +define float @foo1(<2 x float> %a) nounwind {
> > +; CHECK-LABEL: foo1:
> > +; CHECK: faddp.2s
> > +; CHECK-NEXT: ret
> > +  %lane0.i = extractelement <2 x float> %a, i32 0
> > +  %lane1.i = extractelement <2 x float> %a, i32 1
> > +  %vpaddd.i = fadd float %lane0.i, %lane1.i
> > +  ret float %vpaddd.i
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,171 @@
> > +; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s
> > +; <rdar://problem/13621857>
> > +
> > + at block = common global i8* null, align 8
> > +
> > +define i32 @fct(i32 %i1, i32 %i2) {
> > +; CHECK: @fct
> > +; Sign extension is used more than once, thus it should not be folded.
> > +; CodeGenPrepare is not sharing sext accross uses, thus this is folded
> because
> > +; of that.
> > +; _CHECK-NOT_: , sxtw]
> > +entry:
> > +  %idxprom = sext i32 %i1 to i64
> > +  %0 = load i8** @block, align 8
> > +  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
> > +  %1 = load i8* %arrayidx, align 1
> > +  %idxprom1 = sext i32 %i2 to i64
> > +  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
> > +  %2 = load i8* %arrayidx2, align 1
> > +  %cmp = icmp eq i8 %1, %2
> > +  br i1 %cmp, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %cmp7 = icmp ugt i8 %1, %2
> > +  %conv8 = zext i1 %cmp7 to i32
> > +  br label %return
> > +
> > +if.end:                                           ; preds = %entry
> > +  %inc = add nsw i32 %i1, 1
> > +  %inc9 = add nsw i32 %i2, 1
> > +  %idxprom10 = sext i32 %inc to i64
> > +  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
> > +  %3 = load i8* %arrayidx11, align 1
> > +  %idxprom12 = sext i32 %inc9 to i64
> > +  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
> > +  %4 = load i8* %arrayidx13, align 1
> > +  %cmp16 = icmp eq i8 %3, %4
> > +  br i1 %cmp16, label %if.end23, label %if.then18
> > +
> > +if.then18:                                        ; preds = %if.end
> > +  %cmp21 = icmp ugt i8 %3, %4
> > +  %conv22 = zext i1 %cmp21 to i32
> > +  br label %return
> > +
> > +if.end23:                                         ; preds = %if.end
> > +  %inc24 = add nsw i32 %i1, 2
> > +  %inc25 = add nsw i32 %i2, 2
> > +  %idxprom26 = sext i32 %inc24 to i64
> > +  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
> > +  %5 = load i8* %arrayidx27, align 1
> > +  %idxprom28 = sext i32 %inc25 to i64
> > +  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
> > +  %6 = load i8* %arrayidx29, align 1
> > +  %cmp32 = icmp eq i8 %5, %6
> > +  br i1 %cmp32, label %return, label %if.then34
> > +
> > +if.then34:                                        ; preds = %if.end23
> > +  %cmp37 = icmp ugt i8 %5, %6
> > +  %conv38 = zext i1 %cmp37 to i32
> > +  br label %return
> > +
> > +return:                                           ; preds = %if.end23,
> %if.then34, %if.then18, %if.then
> > +  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [
> %conv38, %if.then34 ], [ 1, %if.end23 ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @fct1(i32 %i1, i32 %i2) optsize {
> > +; CHECK: @fct1
> > +; Addressing are folded when optimizing for code size.
> > +; CHECK: , sxtw]
> > +; CHECK: , sxtw]
> > +entry:
> > +  %idxprom = sext i32 %i1 to i64
> > +  %0 = load i8** @block, align 8
> > +  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
> > +  %1 = load i8* %arrayidx, align 1
> > +  %idxprom1 = sext i32 %i2 to i64
> > +  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
> > +  %2 = load i8* %arrayidx2, align 1
> > +  %cmp = icmp eq i8 %1, %2
> > +  br i1 %cmp, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %cmp7 = icmp ugt i8 %1, %2
> > +  %conv8 = zext i1 %cmp7 to i32
> > +  br label %return
> > +
> > +if.end:                                           ; preds = %entry
> > +  %inc = add nsw i32 %i1, 1
> > +  %inc9 = add nsw i32 %i2, 1
> > +  %idxprom10 = sext i32 %inc to i64
> > +  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
> > +  %3 = load i8* %arrayidx11, align 1
> > +  %idxprom12 = sext i32 %inc9 to i64
> > +  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
> > +  %4 = load i8* %arrayidx13, align 1
> > +  %cmp16 = icmp eq i8 %3, %4
> > +  br i1 %cmp16, label %if.end23, label %if.then18
> > +
> > +if.then18:                                        ; preds = %if.end
> > +  %cmp21 = icmp ugt i8 %3, %4
> > +  %conv22 = zext i1 %cmp21 to i32
> > +  br label %return
> > +
> > +if.end23:                                         ; preds = %if.end
> > +  %inc24 = add nsw i32 %i1, 2
> > +  %inc25 = add nsw i32 %i2, 2
> > +  %idxprom26 = sext i32 %inc24 to i64
> > +  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
> > +  %5 = load i8* %arrayidx27, align 1
> > +  %idxprom28 = sext i32 %inc25 to i64
> > +  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
> > +  %6 = load i8* %arrayidx29, align 1
> > +  %cmp32 = icmp eq i8 %5, %6
> > +  br i1 %cmp32, label %return, label %if.then34
> > +
> > +if.then34:                                        ; preds = %if.end23
> > +  %cmp37 = icmp ugt i8 %5, %6
> > +  %conv38 = zext i1 %cmp37 to i32
> > +  br label %return
> > +
> > +return:                                           ; preds = %if.end23,
> %if.then34, %if.then18, %if.then
> > +  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [
> %conv38, %if.then34 ], [ 1, %if.end23 ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +; CHECK: @test
> > +; CHECK-NOT: , uxtw #2]
> > +define i32 @test(i32* %array, i8 zeroext %c, i32 %arg) {
> > +entry:
> > +  %conv = zext i8 %c to i32
> > +  %add = sub i32 0, %arg
> > +  %tobool = icmp eq i32 %conv, %add
> > +  br i1 %tobool, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %idxprom = zext i8 %c to i64
> > +  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
> > +  %0 = load volatile i32* %arrayidx, align 4
> > +  %1 = load volatile i32* %arrayidx, align 4
> > +  %add3 = add nsw i32 %1, %0
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %entry,
> %if.then
> > +  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
> > +  ret i32 %res.0
> > +}
> > +
> > +
> > +; CHECK: @test2
> > +; CHECK: , uxtw #2]
> > +; CHECK: , uxtw #2]
> > +define i32 @test2(i32* %array, i8 zeroext %c, i32 %arg) optsize {
> > +entry:
> > +  %conv = zext i8 %c to i32
> > +  %add = sub i32 0, %arg
> > +  %tobool = icmp eq i32 %conv, %add
> > +  br i1 %tobool, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %idxprom = zext i8 %c to i64
> > +  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
> > +  %0 = load volatile i32* %arrayidx, align 4
> > +  %1 = load volatile i32* %arrayidx, align 4
> > +  %add3 = add nsw i32 %1, %0
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %entry,
> %if.then
> > +  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
> > +  ret i32 %res.0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,82 @@
> > +; RUN: llc -march arm64 < %s | FileCheck %s
> > +; rdar://13452552
> > +; ModuleID = 'reduced_test.ll'
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
> > +target triple = "arm64-apple-ios3.0.0"
> > +
> > + at block = common global i8* null, align 8
> > +
> > +define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
> > +; CHECK: fullGtU
> > +; CHECK: adrp [[PAGE:x[0-9]+]], _block at GOTPAGE
> > +; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block at GOTPAGEOFF]
> > +; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
> > +; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]],  x0,
> sxtw]
> > +; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], x1, sxtw]
> > +; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
> > +; CHECK-NEXT b.ne
> > +; Next BB
> > +; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
> > +; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
> > +; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
> > +; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
> > +; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
> > +; CHECK-NEXT: b.ne
> > +; Next BB
> > +; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2]
> > +; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2]
> > +; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
> > +entry:
> > +  %idxprom = sext i32 %i1 to i64
> > +  %tmp = load i8** @block, align 8
> > +  %arrayidx = getelementptr inbounds i8* %tmp, i64 %idxprom
> > +  %tmp1 = load i8* %arrayidx, align 1
> > +  %idxprom1 = sext i32 %i2 to i64
> > +  %arrayidx2 = getelementptr inbounds i8* %tmp, i64 %idxprom1
> > +  %tmp2 = load i8* %arrayidx2, align 1
> > +  %cmp = icmp eq i8 %tmp1, %tmp2
> > +  br i1 %cmp, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %cmp7 = icmp ugt i8 %tmp1, %tmp2
> > +  %conv9 = zext i1 %cmp7 to i8
> > +  br label %return
> > +
> > +if.end:                                           ; preds = %entry
> > +  %inc = add nsw i32 %i1, 1
> > +  %inc10 = add nsw i32 %i2, 1
> > +  %idxprom11 = sext i32 %inc to i64
> > +  %arrayidx12 = getelementptr inbounds i8* %tmp, i64 %idxprom11
> > +  %tmp3 = load i8* %arrayidx12, align 1
> > +  %idxprom13 = sext i32 %inc10 to i64
> > +  %arrayidx14 = getelementptr inbounds i8* %tmp, i64 %idxprom13
> > +  %tmp4 = load i8* %arrayidx14, align 1
> > +  %cmp17 = icmp eq i8 %tmp3, %tmp4
> > +  br i1 %cmp17, label %if.end25, label %if.then19
> > +
> > +if.then19:                                        ; preds = %if.end
> > +  %cmp22 = icmp ugt i8 %tmp3, %tmp4
> > +  %conv24 = zext i1 %cmp22 to i8
> > +  br label %return
> > +
> > +if.end25:                                         ; preds = %if.end
> > +  %inc26 = add nsw i32 %i1, 2
> > +  %inc27 = add nsw i32 %i2, 2
> > +  %idxprom28 = sext i32 %inc26 to i64
> > +  %arrayidx29 = getelementptr inbounds i8* %tmp, i64 %idxprom28
> > +  %tmp5 = load i8* %arrayidx29, align 1
> > +  %idxprom30 = sext i32 %inc27 to i64
> > +  %arrayidx31 = getelementptr inbounds i8* %tmp, i64 %idxprom30
> > +  %tmp6 = load i8* %arrayidx31, align 1
> > +  %cmp34 = icmp eq i8 %tmp5, %tmp6
> > +  br i1 %cmp34, label %return, label %if.then36
> > +
> > +if.then36:                                        ; preds = %if.end25
> > +  %cmp39 = icmp ugt i8 %tmp5, %tmp6
> > +  %conv41 = zext i1 %cmp39 to i8
> > +  br label %return
> > +
> > +return:                                           ; preds = %if.then36,
> %if.end25, %if.then19, %if.then
> > +  %retval.0 = phi i8 [ %conv9, %if.then ], [ %conv24, %if.then19 ], [
> %conv41, %if.then36 ], [ 0, %if.end25 ]
> > +  ret i8 %retval.0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/addrmode.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addrmode.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/addrmode.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/addrmode.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,72 @@
> > +; RUN: llc -march=arm64 < %s | FileCheck %s
> > +; rdar://10232252
> > +
> > + at object = external hidden global i64, section "__DATA, __objc_ivar",
> align 8
> > +
> > +; base + offset (imm9)
> > +; CHECK: @t1
> > +; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
> > +; CHECK: ret
> > +define void @t1() {
> > +  %incdec.ptr = getelementptr inbounds i64* @object, i64 1
> > +  %tmp = load volatile i64* %incdec.ptr, align 8
> > +  ret void
> > +}
> > +
> > +; base + offset (> imm9)
> > +; CHECK: @t2
> > +; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
> > +; CHECK: ldr xzr, [
> > +; CHECK: [[ADDREG]]]
> > +; CHECK: ret
> > +define void @t2() {
> > +  %incdec.ptr = getelementptr inbounds i64* @object, i64 -33
> > +  %tmp = load volatile i64* %incdec.ptr, align 8
> > +  ret void
> > +}
> > +
> > +; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
> > +; CHECK: @t3
> > +; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
> > +; CHECK: ret
> > +define void @t3() {
> > +  %incdec.ptr = getelementptr inbounds i64* @object, i64 4095
> > +  %tmp = load volatile i64* %incdec.ptr, align 8
> > +  ret void
> > +}
> > +
> > +; base + unsigned offset (> imm12 * size of type in bytes)
> > +; CHECK: @t4
> > +; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #32768
> > +; CHECK: ldr xzr, [
> > +; CHECK: [[ADDREG]]]
> > +; CHECK: ret
> > +define void @t4() {
> > +  %incdec.ptr = getelementptr inbounds i64* @object, i64 4096
> > +  %tmp = load volatile i64* %incdec.ptr, align 8
> > +  ret void
> > +}
> > +
> > +; base + reg
> > +; CHECK: @t5
> > +; CHECK: ldr xzr, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3]
> > +; CHECK: ret
> > +define void @t5(i64 %a) {
> > +  %incdec.ptr = getelementptr inbounds i64* @object, i64 %a
> > +  %tmp = load volatile i64* %incdec.ptr, align 8
> > +  ret void
> > +}
> > +
> > +; base + reg + imm
> > +; CHECK: @t6
> > +; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
> > +; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #32768
> > +; CHECK: ldr xzr, [
> > +; CHECK: [[ADDREG]]]
> > +; CHECK: ret
> > +define void @t6(i64 %a) {
> > +  %tmp1 = getelementptr inbounds i64* @object, i64 %a
> > +  %incdec.ptr = getelementptr inbounds i64* %tmp1, i64 4096
> > +  %tmp = load volatile i64* %incdec.ptr, align 8
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,21 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false |
> FileCheck %s
> > +
> > +; rdar://12713765
> > +; Make sure we are not creating stack objects that are assumed to be
> 64-byte
> > +; aligned.
> > + at T3_retval = common global <16 x float> zeroinitializer, align 16
> > +
> > +define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp {
> > +entry:
> > +; CHECK: test
> > +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp, #32]
> > +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp]
> > +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE:x[0-9]+]],
> #32]
> > +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE]]]
> > + %retval = alloca <16 x float>, align 16
> > + %0 = load <16 x float>* @T3_retval, align 16
> > + store <16 x float> %0, <16 x float>* %retval
> > + %1 = load <16 x float>* %retval
> > + store <16 x float> %1, <16 x float>* %agg.result, align 16
> > + ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,29 @@
> > +; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
> > +
> > +; CHECK: foo
> > +; CHECK: ldr w[[REG:[0-9]+]], [x19, #264]
> > +; CHECK: str w[[REG]], [x19, #132]
> > +; CHECK: ldr w{{[0-9]+}}, [x19, #264]
> > +
> > +define i32 @foo(i32 %a) nounwind {
> > +  %retval = alloca i32, align 4
> > +  %a.addr = alloca i32, align 4
> > +  %arr = alloca [32 x i32], align 4
> > +  %i = alloca i32, align 4
> > +  %arr2 = alloca [32 x i32], align 4
> > +  %j = alloca i32, align 4
> > +  store i32 %a, i32* %a.addr, align 4
> > +  %tmp = load i32* %a.addr, align 4
> > +  %tmp1 = zext i32 %tmp to i64
> > +  %v = mul i64 4, %tmp1
> > +  %vla = alloca i8, i64 %v, align 4
> > +  %tmp2 = bitcast i8* %vla to i32*
> > +  %tmp3 = load i32* %a.addr, align 4
> > +  store i32 %tmp3, i32* %i, align 4
> > +  %tmp4 = load i32* %a.addr, align 4
> > +  store i32 %tmp4, i32* %j, align 4
> > +  %tmp5 = load i32* %j, align 4
> > +  store i32 %tmp5, i32* %retval
> > +  %x = load i32* %retval
> > +  ret i32 %x
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,72 @@
> > +; RUN: llc -O1 -march=arm64 -enable-andcmp-sinking=true < %s |
> FileCheck %s
> > +; ModuleID = 'and-cbz-extr-mr.bc'
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
> > +target triple = "arm64-apple-ios7.0.0"
> > +
> > +define zeroext i1 @foo(i1 %IsEditable, i1 %isTextField, i8* %str1, i8*
> %str2, i8* %str3, i8* %str4, i8* %str5, i8* %str6, i8* %str7, i8* %str8,
> i8* %str9, i8* %str10, i8* %str11, i8* %str12, i8* %str13, i32 %int1, i8*
> %str14) unnamed_addr #0 align 2 {
> > +; CHECK: _foo:
> > +entry:
> > +  %tobool = icmp eq i8* %str14, null
> > +  br i1 %tobool, label %return, label %if.end
> > +
> > +; CHECK: %if.end
> > +; CHECK: tbz
> > +if.end:                                           ; preds = %entry
> > +  %and.i.i.i = and i32 %int1, 4
> > +  %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0
> > +  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i
> > +
> > +land.rhs.i:                                       ; preds = %if.end
> > +  %cmp.i.i.i = icmp eq i8* %str12, %str13
> > +  br i1 %cmp.i.i.i, label %if.then3, label %lor.rhs.i.i.i
> > +
> > +lor.rhs.i.i.i:                                    ; preds = %land.rhs.i
> > +  %cmp.i13.i.i.i = icmp eq i8* %str10, %str11
> > +  br i1 %cmp.i13.i.i.i, label
> %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, label %if.end5
> > +
> > +_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit: ; preds =
> %lor.rhs.i.i.i
> > +  %cmp.i.i.i.i = icmp eq i8* %str8, %str9
> > +  br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5
> > +
> > +if.then3:                                         ; preds =
> %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i
> > +  %tmp11 = load i8* %str14, align 8
> > +  %tmp12 = and i8 %tmp11, 2
> > +  %tmp13 = icmp ne i8 %tmp12, 0
> > +  br label %return
> > +
> > +if.end5:                                          ; preds =
> %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i
> > +; CHECK: %if.end5
> > +; CHECK: tbz
> > +  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19
> > +
> > +land.rhs.i19:                                     ; preds = %if.end5
> > +  %cmp.i.i.i18 = icmp eq i8* %str6, %str7
> > +  br i1 %cmp.i.i.i18, label %if.then7, label %lor.rhs.i.i.i23
> > +
> > +lor.rhs.i.i.i23:                                  ; preds =
> %land.rhs.i19
> > +  %cmp.i13.i.i.i22 = icmp eq i8* %str3, %str4
> > +  br i1 %cmp.i13.i.i.i22, label
> %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, label %if.end12
> > +
> > +_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28: ; preds =
> %lor.rhs.i.i.i23
> > +  %cmp.i.i.i.i26 = icmp eq i8* %str1, %str2
> > +  br i1 %cmp.i.i.i.i26, label %if.then7, label %if.end12
> > +
> > +if.then7:                                         ; preds =
> %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %land.rhs.i19
> > +  br i1 %isTextField, label %if.then9, label %if.end12
> > +
> > +if.then9:                                         ; preds = %if.then7
> > +  %tmp23 = load i8* %str5, align 8
> > +  %tmp24 = and i8 %tmp23, 2
> > +  %tmp25 = icmp ne i8 %tmp24, 0
> > +  br label %return
> > +
> > +if.end12:                                         ; preds = %if.then7,
> %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28,
> %lor.rhs.i.i.i23, %if.end5, %if.end
> > +  %lnot = xor i1 %IsEditable, true
> > +  br label %return
> > +
> > +return:                                           ; preds = %if.end12,
> %if.then9, %if.then3, %entry
> > +  %retval.0 = phi i1 [ %tmp13, %if.then3 ], [ %tmp25, %if.then9 ], [
> %lnot, %if.end12 ], [ true, %entry ]
> > +  ret i1 %retval.0
> > +}
> > +
> > +attributes #0 = { nounwind ssp }
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,19 @@
> > +; RUN: not llc < %s -mtriple=arm64-apple-darwin 2>&1 | FileCheck %s
> > +;
> > +; Check that misuse of anyregcc results in a compile time error.
> > +
> > +; CHECK: LLVM ERROR: ran out of registers during register allocation
> > +define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5,
> i64 %v6, i64 %v7, i64 %v8,
> > +                        i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64
> %v13, i64 %v14, i64 %v15, i64 %v16,
> > +                        i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64
> %v21, i64 %v22, i64 %v23, i64 %v24,
> > +                        i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64
> %v29, i64 %v30, i64 %v31, i64 %v32) {
> > +entry:
> > +  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to
> i8*), i32 32,
> > +                i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
> i64 %v7, i64 %v8,
> > +                i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64
> %v14, i64 %v15, i64 %v16,
> > +                i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64
> %v22, i64 %v23, i64 %v24,
> > +                i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64
> %v30, i64 %v31, i64 %v32)
> > +  ret i64 %result
> > +}
> > +
> > +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/anyregcc.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/anyregcc.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/anyregcc.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/anyregcc.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,358 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
> > +
> > +; Stackmap Header: no constants - 6 callsites
> > +; CHECK-LABEL: .section        __LLVM_STACKMAPS,__llvm_stackmaps
> > +; CHECK-NEXT:  __LLVM_StackMaps:
> > +; Header
> > +; CHECK-NEXT:   .long   0
> > +; Num Functions
> > +; CHECK-NEXT:   .long 8
> > +; CHECK-NEXT:   .long _test
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _property_access1
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _property_access2
> > +; CHECK-NEXT:   .long 32
> > +; CHECK-NEXT:   .long _property_access3
> > +; CHECK-NEXT:   .long 32
> > +; CHECK-NEXT:   .long _anyreg_test1
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _anyreg_test2
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _patchpoint_spilldef
> > +; CHECK-NEXT:   .long 112
> > +; CHECK-NEXT:   .long _patchpoint_spillargs
> > +; CHECK-NEXT:   .long 128
> > +; Num Constants
> > +; CHECK-NEXT:   .long   0
> > +; Num Callsites
> > +; CHECK-NEXT:   .long   8
> > +
> > +; test
> > +; CHECK-LABEL:  .long   L{{.*}}-_test
> > +; CHECK-NEXT:   .short  0
> > +; 3 locations
> > +; CHECK-NEXT:   .short  3
> > +; Loc 0: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 4
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 1: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 4
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 2: Constant 3
> > +; CHECK-NEXT:   .byte 4
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .long 3
> > +define i64 @test() nounwind ssp uwtable {
> > +entry:
> > +  call anyregcc void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1,
> i32 2, i64 3)
> > +  ret i64 0
> > +}
> > +
> > +; property access 1 - %obj is an anyreg call argument and should
> therefore be in a register
> > +; CHECK-LABEL:  .long   L{{.*}}-_property_access1
> > +; CHECK-NEXT:   .short  0
> > +; 2 locations
> > +; CHECK-NEXT:   .short  2
> > +; Loc 0: Register <-- this is the return register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 1: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
> > +entry:
> > +  %f = inttoptr i64 281474417671919 to i8*
> > +  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj)
> > +  ret i64 %ret
> > +}
> > +
> > +; property access 2 - %obj is an anyreg call argument and should
> therefore be in a register
> > +; CHECK-LABEL:  .long   L{{.*}}-_property_access2
> > +; CHECK-NEXT:   .short  0
> > +; 2 locations
> > +; CHECK-NEXT:   .short  2
> > +; Loc 0: Register <-- this is the return register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 1: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +define i64 @property_access2() nounwind ssp uwtable {
> > +entry:
> > +  %obj = alloca i64, align 8
> > +  %f = inttoptr i64 281474417671919 to i8*
> > +  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj)
> > +  ret i64 %ret
> > +}
> > +
> > +; property access 3 - %obj is a frame index
> > +; CHECK-LABEL:  .long   L{{.*}}-_property_access3
> > +; CHECK-NEXT:   .short  0
> > +; 2 locations
> > +; CHECK-NEXT:   .short  2
> > +; Loc 0: Register <-- this is the return register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 1: Direct FP - 8
> > +; CHECK-NEXT:   .byte 2
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short 29
> > +; CHECK-NEXT:   .long -8
> > +define i64 @property_access3() nounwind ssp uwtable {
> > +entry:
> > +  %obj = alloca i64, align 8
> > +  %f = inttoptr i64 281474417671919 to i8*
> > +  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj)
> > +  ret i64 %ret
> > +}
> > +
> > +; anyreg_test1
> > +; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test1
> > +; CHECK-NEXT:   .short  0
> > +; 14 locations
> > +; CHECK-NEXT:   .short  14
> > +; Loc 0: Register <-- this is the return register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 1: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 2: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 3: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 4: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 5: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 6: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 7: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 8: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 9: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 10: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 11: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 12: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 13: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5,
> i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
> nounwind ssp uwtable {
> > +entry:
> > +  %f = inttoptr i64 281474417671919 to i8*
> > +  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1,
> i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8*
> %a10, i8* %a11, i8* %a12, i8* %a13)
> > +  ret i64 %ret
> > +}
> > +
> > +; anyreg_test2
> > +; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test2
> > +; CHECK-NEXT:   .short  0
> > +; 14 locations
> > +; CHECK-NEXT:   .short  14
> > +; Loc 0: Register <-- this is the return register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 1: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 2: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 3: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 4: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 5: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 6: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 7: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 8: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 9: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 10: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 11: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 12: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +; Loc 13: Register
> > +; CHECK-NEXT:   .byte 1
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short {{[0-9]+}}
> > +; CHECK-NEXT:   .long 0
> > +define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5,
> i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
> nounwind ssp uwtable {
> > +entry:
> > +  %f = inttoptr i64 281474417671919 to i8*
> > +  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1,
> i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8*
> %a10, i8* %a11, i8* %a12, i8* %a13)
> > +  ret i64 %ret
> > +}
> > +
> > +; Test spilling the return value of an anyregcc call.
> > +;
> > +; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a
> non-store!"
> > +;
> > +; CHECK-LABEL: .long L{{.*}}-_patchpoint_spilldef
> > +; CHECK-NEXT: .short 0
> > +; CHECK-NEXT: .short 3
> > +; Loc 0: Register (some register that will be spilled to the stack)
> > +; CHECK-NEXT: .byte  1
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short {{[0-9]+}}
> > +; CHECK-NEXT: .long  0
> > +; Loc 1: Register
> > +; CHECK-NEXT: .byte  1
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short {{[0-9]+}}
> > +; CHECK-NEXT: .long  0
> > +; Loc 1: Register
> > +; CHECK-NEXT: .byte  1
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short {{[0-9]+}}
> > +; CHECK-NEXT: .long  0
> > +define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
> > +entry:
> > +  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to
> i8*), i32 2, i64 %p1, i64 %p2)
> > +  tail call void asm sideeffect "nop",
> "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"()
> nounwind
> > +  ret i64 %result
> > +}
> > +
> > +; Test spilling the arguments of an anyregcc call.
> > +;
> > +; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
> > +;
> > +; CHECK-LABEL: .long L{{.*}}-_patchpoint_spillargs
> > +; CHECK-NEXT: .short 0
> > +; CHECK-NEXT: .short 5
> > +; Loc 0: Return a register
> > +; CHECK-NEXT: .byte  1
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short {{[0-9]+}}
> > +; CHECK-NEXT: .long  0
> > +; Loc 1: Arg0 in a Register
> > +; CHECK-NEXT: .byte  1
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short {{[0-9]+}}
> > +; CHECK-NEXT: .long  0
> > +; Loc 2: Arg1 in a Register
> > +; CHECK-NEXT: .byte  1
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short {{[0-9]+}}
> > +; CHECK-NEXT: .long  0
> > +; Loc 3: Arg2 spilled to FP -96
> > +; CHECK-NEXT: .byte  3
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short 29
> > +; CHECK-NEXT: .long -96
> > +; Loc 4: Arg3 spilled to FP - 88
> > +; CHECK-NEXT: .byte  3
> > +; CHECK-NEXT: .byte  8
> > +; CHECK-NEXT: .short 29
> > +; CHECK-NEXT: .long -88
> > +define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
> > +entry:
> > +  tail call void asm sideeffect "nop",
> "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"()
> nounwind
> > +  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to
> i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
> > +  ret i64 %result
> > +}
> > +
> > +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
> > +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,153 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize
> ssp {
> > +; CHECK-LABEL: qadds:
> > +; CHECK: sqadd s0, s0, s1
> > +  %vecext = extractelement <4 x i32> %b, i32 0
> > +  %vecext1 = extractelement <4 x i32> %c, i32 0
> > +  %vqadd.i = tail call i32 @llvm.arm64.neon.sqadd.i32(i32 %vecext, i32
> %vecext1) nounwind
> > +  ret i32 %vqadd.i
> > +}
> > +
> > +define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize
> ssp {
> > +; CHECK-LABEL: qaddd:
> > +; CHECK: sqadd d0, d0, d1
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vecext1 = extractelement <2 x i64> %c, i32 0
> > +  %vqadd.i = tail call i64 @llvm.arm64.neon.sqadd.i64(i64 %vecext, i64
> %vecext1) nounwind
> > +  ret i64 %vqadd.i
> > +}
> > +
> > +define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone
> optsize ssp {
> > +; CHECK-LABEL: uqadds:
> > +; CHECK: uqadd s0, s0, s1
> > +  %vecext = extractelement <4 x i32> %b, i32 0
> > +  %vecext1 = extractelement <4 x i32> %c, i32 0
> > +  %vqadd.i = tail call i32 @llvm.arm64.neon.uqadd.i32(i32 %vecext, i32
> %vecext1) nounwind
> > +  ret i32 %vqadd.i
> > +}
> > +
> > +define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone
> optsize ssp {
> > +; CHECK-LABEL: uqaddd:
> > +; CHECK: uqadd d0, d0, d1
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vecext1 = extractelement <2 x i64> %c, i32 0
> > +  %vqadd.i = tail call i64 @llvm.arm64.neon.uqadd.i64(i64 %vecext, i64
> %vecext1) nounwind
> > +  ret i64 %vqadd.i
> > +}
> > +
> > +declare i64 @llvm.arm64.neon.uqadd.i64(i64, i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.uqadd.i32(i32, i32) nounwind readnone
> > +declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) nounwind readnone
> > +
> > +define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize
> ssp {
> > +; CHECK-LABEL: qsubs:
> > +; CHECK: sqsub s0, s0, s1
> > +  %vecext = extractelement <4 x i32> %b, i32 0
> > +  %vecext1 = extractelement <4 x i32> %c, i32 0
> > +  %vqsub.i = tail call i32 @llvm.arm64.neon.sqsub.i32(i32 %vecext, i32
> %vecext1) nounwind
> > +  ret i32 %vqsub.i
> > +}
> > +
> > +define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize
> ssp {
> > +; CHECK-LABEL: qsubd:
> > +; CHECK: sqsub d0, d0, d1
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vecext1 = extractelement <2 x i64> %c, i32 0
> > +  %vqsub.i = tail call i64 @llvm.arm64.neon.sqsub.i64(i64 %vecext, i64
> %vecext1) nounwind
> > +  ret i64 %vqsub.i
> > +}
> > +
> > +define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone
> optsize ssp {
> > +; CHECK-LABEL: uqsubs:
> > +; CHECK: uqsub s0, s0, s1
> > +  %vecext = extractelement <4 x i32> %b, i32 0
> > +  %vecext1 = extractelement <4 x i32> %c, i32 0
> > +  %vqsub.i = tail call i32 @llvm.arm64.neon.uqsub.i32(i32 %vecext, i32
> %vecext1) nounwind
> > +  ret i32 %vqsub.i
> > +}
> > +
> > +define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone
> optsize ssp {
> > +; CHECK-LABEL: uqsubd:
> > +; CHECK: uqsub d0, d0, d1
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vecext1 = extractelement <2 x i64> %c, i32 0
> > +  %vqsub.i = tail call i64 @llvm.arm64.neon.uqsub.i64(i64 %vecext, i64
> %vecext1) nounwind
> > +  ret i64 %vqsub.i
> > +}
> > +
> > +declare i64 @llvm.arm64.neon.uqsub.i64(i64, i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.uqsub.i32(i32, i32) nounwind readnone
> > +declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) nounwind readnone
> > +
> > +define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
> > +; CHECK-LABEL: qabss:
> > +; CHECK: sqabs s0, s0
> > +; CHECK: ret
> > +  %vecext = extractelement <4 x i32> %b, i32 0
> > +  %vqabs.i = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %vecext)
> nounwind
> > +  ret i32 %vqabs.i
> > +}
> > +
> > +define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
> > +; CHECK-LABEL: qabsd:
> > +; CHECK: sqabs d0, d0
> > +; CHECK: ret
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vqabs.i = tail call i64 @llvm.arm64.neon.sqabs.i64(i64 %vecext)
> nounwind
> > +  ret i64 %vqabs.i
> > +}
> > +
> > +define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
> > +; CHECK-LABEL: qnegs:
> > +; CHECK: sqneg s0, s0
> > +; CHECK: ret
> > +  %vecext = extractelement <4 x i32> %b, i32 0
> > +  %vqneg.i = tail call i32 @llvm.arm64.neon.sqneg.i32(i32 %vecext)
> nounwind
> > +  ret i32 %vqneg.i
> > +}
> > +
> > +define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
> > +; CHECK-LABEL: qnegd:
> > +; CHECK: sqneg d0, d0
> > +; CHECK: ret
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vqneg.i = tail call i64 @llvm.arm64.neon.sqneg.i64(i64 %vecext)
> nounwind
> > +  ret i64 %vqneg.i
> > +}
> > +
> > +declare i64 @llvm.arm64.neon.sqneg.i64(i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.sqneg.i32(i32) nounwind readnone
> > +declare i64 @llvm.arm64.neon.sqabs.i64(i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.sqabs.i32(i32) nounwind readnone
> > +
> > +
> > +define i32 @vqmovund(<2 x i64> %b) nounwind readnone {
> > +; CHECK-LABEL: vqmovund:
> > +; CHECK: sqxtun s0, d0
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vqmovun.i = tail call i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64
> %vecext) nounwind
> > +  ret i32 %vqmovun.i
> > +}
> > +
> > +define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone {
> > +; CHECK-LABEL: vqmovnd_s:
> > +; CHECK: sqxtn s0, d0
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64
> %vecext) nounwind
> > +  ret i32 %vqmovn.i
> > +}
> > +
> > +define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
> > +; CHECK-LABEL: vqmovnd_u:
> > +; CHECK: uqxtn s0, d0
> > +  %vecext = extractelement <2 x i64> %b, i32 0
> > +  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64
> %vecext) nounwind
> > +  ret i32 %vqmovn.i
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
> > +declare i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64) nounwind
> readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/arith.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/arith.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/arith.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/arith.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,262 @@
> > +; RUN: llc < %s -march=arm64 -asm-verbose=false | FileCheck %s
> > +
> > +define i32 @t1(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: add w0, w1, w0
> > +; CHECK: ret
> > +  %add = add i32 %b, %a
> > +  ret i32 %add
> > +}
> > +
> > +define i32 @t2(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: udiv w0, w0, w1
> > +; CHECK: ret
> > +  %udiv = udiv i32 %a, %b
> > +  ret i32 %udiv
> > +}
> > +
> > +define i64 @t3(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t3:
> > +; CHECK: udiv x0, x0, x1
> > +; CHECK: ret
> > +  %udiv = udiv i64 %a, %b
> > +  ret i64 %udiv
> > +}
> > +
> > +define i32 @t4(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t4:
> > +; CHECK: sdiv w0, w0, w1
> > +; CHECK: ret
> > +  %sdiv = sdiv i32 %a, %b
> > +  ret i32 %sdiv
> > +}
> > +
> > +define i64 @t5(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t5:
> > +; CHECK: sdiv x0, x0, x1
> > +; CHECK: ret
> > +  %sdiv = sdiv i64 %a, %b
> > +  ret i64 %sdiv
> > +}
> > +
> > +define i32 @t6(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t6:
> > +; CHECK: lslv w0, w0, w1
> > +; CHECK: ret
> > +  %shl = shl i32 %a, %b
> > +  ret i32 %shl
> > +}
> > +
> > +define i64 @t7(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t7:
> > +; CHECK: lslv x0, x0, x1
> > +; CHECK: ret
> > +  %shl = shl i64 %a, %b
> > +  ret i64 %shl
> > +}
> > +
> > +define i32 @t8(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t8:
> > +; CHECK: lsrv w0, w0, w1
> > +; CHECK: ret
> > +  %lshr = lshr i32 %a, %b
> > +  ret i32 %lshr
> > +}
> > +
> > +define i64 @t9(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t9:
> > +; CHECK: lsrv x0, x0, x1
> > +; CHECK: ret
> > +  %lshr = lshr i64 %a, %b
> > +  ret i64 %lshr
> > +}
> > +
> > +define i32 @t10(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t10:
> > +; CHECK: asrv w0, w0, w1
> > +; CHECK: ret
> > +  %ashr = ashr i32 %a, %b
> > +  ret i32 %ashr
> > +}
> > +
> > +define i64 @t11(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t11:
> > +; CHECK: asrv x0, x0, x1
> > +; CHECK: ret
> > +  %ashr = ashr i64 %a, %b
> > +  ret i64 %ashr
> > +}
> > +
> > +define i32 @t12(i16 %a, i32 %x) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t12:
> > +; CHECK: add   w0, w1, w0, sxth
> > +; CHECK: ret
> > +  %c = sext i16 %a to i32
> > +  %e = add i32 %x, %c
> > +  ret i32 %e
> > +}
> > +
> > +define i32 @t13(i16 %a, i32 %x) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t13:
> > +; CHECK: add   w0, w1, w0, sxth #2
> > +; CHECK: ret
> > +  %c = sext i16 %a to i32
> > +  %d = shl i32 %c, 2
> > +  %e = add i32 %x, %d
> > +  ret i32 %e
> > +}
> > +
> > +define i64 @t14(i16 %a, i64 %x) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t14:
> > +; CHECK: add   x0, x1, w0, uxth #3
> > +; CHECK: ret
> > +  %c = zext i16 %a to i64
> > +  %d = shl i64 %c, 3
> > +  %e = add i64 %x, %d
> > +  ret i64 %e
> > +}
> > +
> > +; rdar://9160598
> > +define i64 @t15(i64 %a, i64 %x) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t15:
> > +; CHECK: add x0, x1, w0, uxtw
> > +; CHECK: ret
> > +  %b = and i64 %a, 4294967295
> > +  %c = add i64 %x, %b
> > +  ret i64 %c
> > +}
> > +
> > +define i64 @t16(i64 %x) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t16:
> > +; CHECK: lsl x0, x0, #1
> > +; CHECK: ret
> > +  %a = shl i64 %x, 1
> > +  ret i64 %a
> > +}
> > +
> > +; rdar://9166974
> > +define i64 @t17(i16 %a, i64 %x) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t17:
> > +; CHECK: sxth [[REG:x[0-9]+]], x0
> > +; CHECK: sub x0, xzr, [[REG]], lsl #32
> > +; CHECK: ret
> > +  %tmp16 = sext i16 %a to i64
> > +  %tmp17 = mul i64 %tmp16, -4294967296
> > +  ret i64 %tmp17
> > +}
> > +
> > +define i32 @t18(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t18:
> > +; CHECK: sdiv w0, w0, w1
> > +; CHECK: ret
> > +  %sdiv = call i32 @llvm.arm64.sdiv.i32(i32 %a, i32 %b)
> > +  ret i32 %sdiv
> > +}
> > +
> > +define i64 @t19(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t19:
> > +; CHECK: sdiv x0, x0, x1
> > +; CHECK: ret
> > +  %sdiv = call i64 @llvm.arm64.sdiv.i64(i64 %a, i64 %b)
> > +  ret i64 %sdiv
> > +}
> > +
> > +define i32 @t20(i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t20:
> > +; CHECK: udiv w0, w0, w1
> > +; CHECK: ret
> > +  %udiv = call i32 @llvm.arm64.udiv.i32(i32 %a, i32 %b)
> > +  ret i32 %udiv
> > +}
> > +
> > +define i64 @t21(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t21:
> > +; CHECK: udiv x0, x0, x1
> > +; CHECK: ret
> > +  %udiv = call i64 @llvm.arm64.udiv.i64(i64 %a, i64 %b)
> > +  ret i64 %udiv
> > +}
> > +
> > +declare i32 @llvm.arm64.sdiv.i32(i32, i32) nounwind readnone
> > +declare i64 @llvm.arm64.sdiv.i64(i64, i64) nounwind readnone
> > +declare i32 @llvm.arm64.udiv.i32(i32, i32) nounwind readnone
> > +declare i64 @llvm.arm64.udiv.i64(i64, i64) nounwind readnone
> > +
> > +; 32-bit not.
> > +define i32 @inv_32(i32 %x) nounwind ssp {
> > +entry:
> > +; CHECK: inv_32
> > +; CHECK: mvn w0, w0
> > +; CHECK: ret
> > +  %inv = xor i32 %x, -1
> > +  ret i32 %inv
> > +}
> > +
> > +; 64-bit not.
> > +define i64 @inv_64(i64 %x) nounwind ssp {
> > +entry:
> > +; CHECK: inv_64
> > +; CHECK: mvn x0, x0
> > +; CHECK: ret
> > +  %inv = xor i64 %x, -1
> > +  ret i64 %inv
> > +}
> > +
> > +; Multiplying by a power of two plus or minus one is better done via
> shift
> > +; and add/sub rather than the madd/msub instructions. The latter are 4+
> cycles,
> > +; and the former are two (total for the two instruction sequence for
> subtract).
> > +define i32 @f0(i32 %a) nounwind readnone ssp {
> > +; CHECK-LABEL: f0:
> > +; CHECK-NEXT: add w0, w0, w0, lsl #3
> > +; CHECK-NEXT: ret
> > +  %res = mul i32 %a, 9
> > +  ret i32 %res
> > +}
> > +
> > +define i64 @f1(i64 %a) nounwind readnone ssp {
> > +; CHECK-LABEL: f1:
> > +; CHECK-NEXT: lsl x8, x0, #4
> > +; CHECK-NEXT: sub x0, x8, x0
> > +; CHECK-NEXT: ret
> > +  %res = mul i64 %a, 15
> > +  ret i64 %res
> > +}
> > +
> > +define i32 @f2(i32 %a) nounwind readnone ssp {
> > +; CHECK-LABEL: f2:
> > +; CHECK-NEXT: lsl w8, w0, #3
> > +; CHECK-NEXT: sub w0, w8, w0
> > +; CHECK-NEXT: ret
> > +  %res = mul nsw i32 %a, 7
> > +  ret i32 %res
> > +}
> > +
> > +define i64 @f3(i64 %a) nounwind readnone ssp {
> > +; CHECK-LABEL: f3:
> > +; CHECK-NEXT: add x0, x0, x0, lsl #4
> > +; CHECK-NEXT: ret
> > +  %res = mul nsw i64 %a, 17
> > +  ret i64 %res
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/atomic-128.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/atomic-128.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/atomic-128.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/atomic-128.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,213 @@
> > +; RUN: llc < %s -march=arm64 -mtriple=arm64-linux-gnu
> -verify-machineinstrs | FileCheck %s
> > +
> > + at var = global i128 0
> > +
> > +define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval)
> {
> > +; CHECK-LABEL: val_compare_and_swap:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp   [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x0]
> > +; CHECK: cmp    [[RESULTLO]], x2
> > +; CHECK: sbc    xzr, [[RESULTHI]], x3
> > +; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
> > +; CHECK: stxp   [[SCRATCH_RES:w[0-9]+]], x4, x5, [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +; CHECK: [[LABEL2]]:
> > +  %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
> > +  ret i128 %val
> > +}
> > +
> > +define void @fetch_and_nand(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_nand:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: bic    [[SCRATCH_REGLO:x[0-9]+]], x2, [[DEST_REGLO]]
> > +; CHECK: bic    [[SCRATCH_REGHI:x[0-9]+]], x3, [[DEST_REGHI]]
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw nand i128* %p, i128 %bits release
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define void @fetch_and_or(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_or:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: orr    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
> > +; CHECK: orr    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw or i128* %p, i128 %bits seq_cst
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define void @fetch_and_add(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_add:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: adds   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
> > +; CHECK: adc    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw add i128* %p, i128 %bits seq_cst
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define void @fetch_and_sub(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_sub:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: subs   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
> > +; CHECK: sbc    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define void @fetch_and_min(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_min:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: cmp    [[DEST_REGLO]], x2
> > +; CHECK: sbc    xzr, [[DEST_REGHI]], x3
> > +; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, lt
> > +; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, lt
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw min i128* %p, i128 %bits seq_cst
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define void @fetch_and_max(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_max:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: cmp    [[DEST_REGLO]], x2
> > +; CHECK: sbc    xzr, [[DEST_REGHI]], x3
> > +; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, gt
> > +; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, gt
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw max i128* %p, i128 %bits seq_cst
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define void @fetch_and_umin(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_umin:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: cmp    [[DEST_REGLO]], x2
> > +; CHECK: sbc    xzr, [[DEST_REGHI]], x3
> > +; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, cc
> > +; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, cc
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define void @fetch_and_umax(i128* %p, i128 %bits) {
> > +; CHECK-LABEL: fetch_and_umax:
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
> > +; CHECK: cmp    [[DEST_REGLO]], x2
> > +; CHECK: sbc    xzr, [[DEST_REGHI]], x3
> > +; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, hi
> > +; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, hi
> > +; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]],
> [[SCRATCH_REGHI]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
> > +
> > +; CHECK: str    [[DEST_REGHI]]
> > +; CHECK: str    [[DEST_REGLO]]
> > +  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
> > +  store i128 %val, i128* @var, align 16
> > +  ret void
> > +}
> > +
> > +define i128 @atomic_load_seq_cst(i128* %p) {
> > +; CHECK-LABEL: atomic_load_seq_cst:
> > +; CHECK-NOT: dmb
> > +; CHECK-LABEL: ldaxp
> > +; CHECK-NOT: dmb
> > +   %r = load atomic i128* %p seq_cst, align 16
> > +   ret i128 %r
> > +}
> > +
> > +define i128 @atomic_load_relaxed(i128* %p) {
> > +; CHECK-LABEL: atomic_load_relaxed:
> > +; CHECK-NOT: dmb
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
> > +; CHECK: orr [[SAMELO:x[0-9]+]], [[LO]], xzr
> > +; CHECK: orr [[SAMEHI:x[0-9]+]], [[HI]], xzr
> > +; CHECK: stxp [[SUCCESS:w[0-9]+]], [[SAMELO]], [[SAMEHI]], [x0]
> > +; CHECK: cbnz [[SUCCESS]], [[LABEL]]
> > +; CHECK-NOT: dmb
> > +   %r = load atomic i128* %p monotonic, align 16
> > +   ret i128 %r
> > +}
> > +
> > +
> > +define void @atomic_store_seq_cst(i128 %in, i128* %p) {
> > +; CHECK-LABEL: atomic_store_seq_cst:
> > +; CHECK-NOT: dmb
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxp xzr, xzr, [x2]
> > +; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
> > +; CHECK: cbnz [[SUCCESS]], [[LABEL]]
> > +; CHECK-NOT: dmb
> > +   store atomic i128 %in, i128* %p seq_cst, align 16
> > +   ret void
> > +}
> > +
> > +define void @atomic_store_release(i128 %in, i128* %p) {
> > +; CHECK-LABEL: atomic_store_release:
> > +; CHECK-NOT: dmb
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldxp xzr, xzr, [x2]
> > +; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
> > +; CHECK: cbnz [[SUCCESS]], [[LABEL]]
> > +; CHECK-NOT: dmb
> > +   store atomic i128 %in, i128* %p release, align 16
> > +   ret void
> > +}
> > +
> > +define void @atomic_store_relaxed(i128 %in, i128* %p) {
> > +; CHECK-LABEL: atomic_store_relaxed:
> > +; CHECK-NOT: dmb
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldxp xzr, xzr, [x2]
> > +; CHECK: stxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
> > +; CHECK: cbnz [[SUCCESS]], [[LABEL]]
> > +; CHECK-NOT: dmb
> > +   store atomic i128 %in, i128* %p unordered, align 16
> > +   ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/atomic.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/atomic.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/atomic.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/atomic.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,343 @@
> > +; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
> > +
> > +define i32 @val_compare_and_swap(i32* %p) {
> > +; CHECK-LABEL: val_compare_and_swap:
> > +; CHECK: orr    [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
> > +; CHECK: orr    [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxr   [[RESULT:w[0-9]+]], [x0]
> > +; CHECK: cmp    [[RESULT]], [[OLDVAL_REG]]
> > +; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
> > +; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
> > +; CHECK: [[LABEL2]]:
> > +  %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
> > +  ret i32 %val
> > +}
> > +
> > +define i64 @val_compare_and_swap_64(i64* %p) {
> > +; CHECK-LABEL: val_compare_and_swap_64:
> > +; CHECK: orr    [[NEWVAL_REG:x[0-9]+]], xzr, #0x4
> > +; CHECK: orr    [[OLDVAL_REG:x[0-9]+]], xzr, #0x7
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldxr   [[RESULT:x[0-9]+]], [x0]
> > +; CHECK: cmp    [[RESULT]], [[OLDVAL_REG]]
> > +; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
> > +; CHECK-NOT: stxr [[NEWVAL_REG]], [[NEWVAL_REG]]
> > +; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
> > +; CHECK: [[LABEL2]]:
> > +  %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
> > +  ret i64 %val
> > +}
> > +
> > +define i32 @fetch_and_nand(i32* %p) {
> > +; CHECK-LABEL: fetch_and_nand:
> > +; CHECK: orr    [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
> > +; CHECK: bic    [[SCRATCH2_REG:w[0-9]+]], [[OLDVAL_REG]], w[[DEST_REG]]
> > +; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
> > +; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
> > +; CHECK: mov    x0, x[[DEST_REG]]
> > +  %val = atomicrmw nand i32* %p, i32 7 release
> > +  ret i32 %val
> > +}
> > +
> > +define i64 @fetch_and_nand_64(i64* %p) {
> > +; CHECK-LABEL: fetch_and_nand_64:
> > +; CHECK: orr    [[OLDVAL_REG:x[0-9]+]], xzr, #0x7
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxr   [[DEST_REG:x[0-9]+]], [x0]
> > +; CHECK: bic    [[SCRATCH2_REG:x[0-9]+]], [[OLDVAL_REG]], [[DEST_REG]]
> > +; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
> > +; CHECK: mov    x0, [[DEST_REG]]
> > +  %val = atomicrmw nand i64* %p, i64 7 acq_rel
> > +  ret i64 %val
> > +}
> > +
> > +define i32 @fetch_and_or(i32* %p) {
> > +; CHECK-LABEL: fetch_and_or:
> > +; CHECK: movz   [[OLDVAL_REG:w[0-9]+]], #5
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldaxr   w[[DEST_REG:[0-9]+]], [x0]
> > +; CHECK: orr    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
> > +; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
> > +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
> > +; CHECK: mov    x0, x[[DEST_REG]]
> > +  %val = atomicrmw or i32* %p, i32 5 seq_cst
> > +  ret i32 %val
> > +}
> > +
> > +define i64 @fetch_and_or_64(i64* %p) {
> > +; CHECK: fetch_and_or_64:
> > +; CHECK: orr    [[OLDVAL_REG:x[0-9]+]], xzr, #0x7
> > +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
> > +; CHECK: ldxr   [[DEST_REG:x[0-9]+]], [x0]
> > +; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], [[OLDVAL_REG]]
> > +; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
> > +; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
> > +; CHECK: mov    x0, [[DEST_REG]]
> > +  %val = atomicrmw or i64* %p, i64 7 monotonic
> > +  ret i64 %val
> > +}
> > +
> > +define void @acquire_fence() {
> > +   fence acquire
> > +   ret void
> > +   ; CHECK-LABEL: acquire_fence:
> > +   ; CHECK: dmb ishld
> > +}
> > +
> > +define void @release_fence() {
> > +   fence release
> > +   ret void
> > +   ; CHECK-LABEL: release_fence:
> > +   ; CHECK: dmb ish{{$}}
> > +}
> > +
> > +define void @seq_cst_fence() {
> > +   fence seq_cst
> > +   ret void
> > +   ; CHECK-LABEL: seq_cst_fence:
> > +   ; CHECK: dmb ish{{$}}
> > +}
> > +
> > +define i32 @atomic_load(i32* %p) {
> > +   %r = load atomic i32* %p seq_cst, align 4
> > +   ret i32 %r
> > +   ; CHECK-LABEL: atomic_load:
> > +   ; CHECK: ldar
> > +}
> > +
> > +define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) {
> > +; CHECK-LABEL: atomic_load_relaxed_8:
> > +  %ptr_unsigned = getelementptr i8* %p, i32 4095
> > +  %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1
> > +; CHECK: ldrb {{w[0-9]+}}, [x0, #4095]
> > +
> > +  %ptr_regoff = getelementptr i8* %p, i32 %off32
> > +  %val_regoff = load atomic i8* %ptr_regoff unordered, align 1
> > +  %tot1 = add i8 %val_unsigned, %val_regoff
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: ldrb {{w[0-9]+}}, [x0, x1, sxtw]
> > +
> > +  %ptr_unscaled = getelementptr i8* %p, i32 -256
> > +  %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1
> > +  %tot2 = add i8 %tot1, %val_unscaled
> > +; CHECK: ldurb {{w[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD
> imm)
> > +  %val_random = load atomic i8* %ptr_random unordered, align 1
> > +  %tot3 = add i8 %tot2, %val_random
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret i8 %tot3
> > +}
> > +
> > +define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) {
> > +; CHECK-LABEL: atomic_load_relaxed_16:
> > +  %ptr_unsigned = getelementptr i16* %p, i32 4095
> > +  %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2
> > +; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
> > +
> > +  %ptr_regoff = getelementptr i16* %p, i32 %off32
> > +  %val_regoff = load atomic i16* %ptr_regoff unordered, align 2
> > +  %tot1 = add i16 %val_unsigned, %val_regoff
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: ldrh {{w[0-9]+}}, [x0, x1, sxtw #1]
> > +
> > +  %ptr_unscaled = getelementptr i16* %p, i32 -128
> > +  %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2
> > +  %tot2 = add i16 %tot1, %val_unscaled
> > +; CHECK: ldurh {{w[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e.
> ADD imm)
> > +  %val_random = load atomic i16* %ptr_random unordered, align 2
> > +  %tot3 = add i16 %tot2, %val_random
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret i16 %tot3
> > +}
> > +
> > +define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) {
> > +; CHECK-LABEL: atomic_load_relaxed_32:
> > +  %ptr_unsigned = getelementptr i32* %p, i32 4095
> > +  %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4
> > +; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
> > +
> > +  %ptr_regoff = getelementptr i32* %p, i32 %off32
> > +  %val_regoff = load atomic i32* %ptr_regoff unordered, align 4
> > +  %tot1 = add i32 %val_unsigned, %val_regoff
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: ldr {{w[0-9]+}}, [x0, x1, sxtw #2]
> > +
> > +  %ptr_unscaled = getelementptr i32* %p, i32 -64
> > +  %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4
> > +  %tot2 = add i32 %tot1, %val_unscaled
> > +; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e.
> ADD imm)
> > +  %val_random = load atomic i32* %ptr_random unordered, align 4
> > +  %tot3 = add i32 %tot2, %val_random
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret i32 %tot3
> > +}
> > +
> > +define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) {
> > +; CHECK-LABEL: atomic_load_relaxed_64:
> > +  %ptr_unsigned = getelementptr i64* %p, i32 4095
> > +  %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8
> > +; CHECK: ldr {{x[0-9]+}}, [x0, #32760]
> > +
> > +  %ptr_regoff = getelementptr i64* %p, i32 %off32
> > +  %val_regoff = load atomic i64* %ptr_regoff unordered, align 8
> > +  %tot1 = add i64 %val_unsigned, %val_regoff
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: ldr {{x[0-9]+}}, [x0, x1, sxtw #3]
> > +
> > +  %ptr_unscaled = getelementptr i64* %p, i32 -32
> > +  %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8
> > +  %tot2 = add i64 %tot1, %val_unscaled
> > +; CHECK: ldur {{x[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e.
> ADD imm)
> > +  %val_random = load atomic i64* %ptr_random unordered, align 8
> > +  %tot3 = add i64 %tot2, %val_random
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret i64 %tot3
> > +}
> > +
> > +
> > +define void @atomc_store(i32* %p) {
> > +   store atomic i32 4, i32* %p seq_cst, align 4
> > +   ret void
> > +   ; CHECK-LABEL: atomc_store:
> > +   ; CHECK: stlr
> > +}
> > +
> > +define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) {
> > +; CHECK-LABEL: atomic_store_relaxed_8:
> > +  %ptr_unsigned = getelementptr i8* %p, i32 4095
> > +  store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1
> > +; CHECK: strb {{w[0-9]+}}, [x0, #4095]
> > +
> > +  %ptr_regoff = getelementptr i8* %p, i32 %off32
> > +  store atomic i8 %val, i8* %ptr_regoff unordered, align 1
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: strb {{w[0-9]+}}, [x0, x1, sxtw]
> > +
> > +  %ptr_unscaled = getelementptr i8* %p, i32 -256
> > +  store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1
> > +; CHECK: sturb {{w[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD
> imm)
> > +  store atomic i8 %val, i8* %ptr_random unordered, align 1
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: strb {{w[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret void
> > +}
> > +
> > +define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) {
> > +; CHECK-LABEL: atomic_store_relaxed_16:
> > +  %ptr_unsigned = getelementptr i16* %p, i32 4095
> > +  store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2
> > +; CHECK: strh {{w[0-9]+}}, [x0, #8190]
> > +
> > +  %ptr_regoff = getelementptr i16* %p, i32 %off32
> > +  store atomic i16 %val, i16* %ptr_regoff unordered, align 2
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: strh {{w[0-9]+}}, [x0, x1, sxtw #1]
> > +
> > +  %ptr_unscaled = getelementptr i16* %p, i32 -128
> > +  store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2
> > +; CHECK: sturh {{w[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e.
> ADD imm)
> > +  store atomic i16 %val, i16* %ptr_random unordered, align 2
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: strh {{w[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret void
> > +}
> > +
> > +define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) {
> > +; CHECK-LABEL: atomic_store_relaxed_32:
> > +  %ptr_unsigned = getelementptr i32* %p, i32 4095
> > +  store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4
> > +; CHECK: str {{w[0-9]+}}, [x0, #16380]
> > +
> > +  %ptr_regoff = getelementptr i32* %p, i32 %off32
> > +  store atomic i32 %val, i32* %ptr_regoff unordered, align 4
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: str {{w[0-9]+}}, [x0, x1, sxtw #2]
> > +
> > +  %ptr_unscaled = getelementptr i32* %p, i32 -64
> > +  store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4
> > +; CHECK: stur {{w[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e.
> ADD imm)
> > +  store atomic i32 %val, i32* %ptr_random unordered, align 4
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: str {{w[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret void
> > +}
> > +
> > +define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) {
> > +; CHECK-LABEL: atomic_store_relaxed_64:
> > +  %ptr_unsigned = getelementptr i64* %p, i32 4095
> > +  store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8
> > +; CHECK: str {{x[0-9]+}}, [x0, #32760]
> > +
> > +  %ptr_regoff = getelementptr i64* %p, i32 %off32
> > +  store atomic i64 %val, i64* %ptr_regoff unordered, align 8
> > +  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an
> x-reg.
> > +; CHECK: str {{x[0-9]+}}, [x0, x1, sxtw #3]
> > +
> > +  %ptr_unscaled = getelementptr i64* %p, i32 -32
> > +  store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8
> > +; CHECK: stur {{x[0-9]+}}, [x0, #-256]
> > +
> > +  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e.
> ADD imm)
> > +  store atomic i64 %val, i64* %ptr_random unordered, align 8
> > +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
> > +; CHECK: str {{x[0-9]+}}, [x[[ADDR]]]
> > +
> > +  ret void
> > +}
> > +
> > +; rdar://11531169
> > +; rdar://11531308
> > +
> > +%"class.X::Atomic" = type { %struct.x_atomic_t }
> > +%struct.x_atomic_t = type { i32 }
> > +
> > + at counter = external hidden global %"class.X::Atomic", align 4
> > +
> > +define i32 @next_id() nounwind optsize ssp align 2 {
> > +entry:
> > +  %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"*
> @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
> > +  %add.i = add i32 %0, 1
> > +  %tobool = icmp eq i32 %add.i, 0
> > +  br i1 %tobool, label %if.else, label %return
> > +
> > +if.else:                                          ; preds = %entry
> > +  %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"*
> @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
> > +  %add.i2 = add i32 %1, 1
> > +  br label %return
> > +
> > +return:                                           ; preds = %if.else,
> %entry
> > +  %retval.0 = phi i32 [ %add.i2, %if.else ], [ %add.i, %entry ]
> > +  ret i32 %retval.0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,14 @@
> > +; RUN: llc -march=arm64 < %s
> > +
> > +
> > +; Make sure large offsets aren't mistaken for valid immediate offsets.
> > +; <rdar://problem/13190511>
> > +define void @f(i32* nocapture %p) {
> > +entry:
> > +  %a = ptrtoint i32* %p to i64
> > +  %ao = add i64 %a, 25769803792
> > +  %b = inttoptr i64 %ao to i32*
> > +  store volatile i32 0, i32* %b, align 4
> > +  store volatile i32 0, i32* %b, align 4
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/big-stack.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-stack.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/big-stack.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/big-stack.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,21 @@
> > +; RUN: llc < %s | FileCheck %s
> > +target triple = "arm64-apple-macosx10"
> > +
> > +; Check that big stacks are generated correctly.
> > +; Currently, this is done by a sequence of sub instructions,
> > +; which can encode immediate with a 12 bits mask an optionally
> > +; shift left (up to 12). I.e., 16773120 is the biggest value.
> > +; <rdar://12513931>
> > +; CHECK-LABEL: foo:
> > +; CHECK: sub sp, sp, #16773120
> > +; CHECK: sub sp, sp, #16773120
> > +; CHECK: sub sp, sp, #8192
> > +define void @foo() nounwind ssp {
> > +entry:
> > +  %buffer = alloca [33554432 x i8], align 1
> > +  %arraydecay = getelementptr inbounds [33554432 x i8]* %buffer, i64 0,
> i64 0
> > +  call void @doit(i8* %arraydecay) nounwind
> > +  ret void
> > +}
> > +
> > +declare void @doit(i8*)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,406 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +%struct.X = type { i8, i8, [2 x i8] }
> > +%struct.Y = type { i32, i8 }
> > +%struct.Z = type { i8, i8, [2 x i8], i16 }
> > +%struct.A = type { i64, i8 }
> > +
> > +define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y)
> nounwind optsize ssp {
> > +; CHECK-LABEL: foo:
> > +; CHECK: ubfm
> > +; CHECK-NOT: and
> > +; CHECK: ret
> > +
> > +  %tmp = bitcast %struct.X* %x to i32*
> > +  %tmp1 = load i32* %tmp, align 4
> > +  %b = getelementptr inbounds %struct.Y* %y, i64 0, i32 1
> > +  %bf.clear = lshr i32 %tmp1, 3
> > +  %bf.clear.lobit = and i32 %bf.clear, 1
> > +  %frombool = trunc i32 %bf.clear.lobit to i8
> > +  store i8 %frombool, i8* %b, align 1
> > +  ret void
> > +}
> > +
> > +define i32 @baz(i64 %cav1.coerce) nounwind {
> > +; CHECK-LABEL: baz:
> > +; CHECK: sbfm  w0, w0, #0, #3
> > +  %tmp = trunc i64 %cav1.coerce to i32
> > +  %tmp1 = shl i32 %tmp, 28
> > +  %bf.val.sext = ashr exact i32 %tmp1, 28
> > +  ret i32 %bf.val.sext
> > +}
> > +
> > +define i32 @bar(i64 %cav1.coerce) nounwind {
> > +; CHECK-LABEL: bar:
> > +; CHECK: sbfm  w0, w0, #4, #9
> > +  %tmp = trunc i64 %cav1.coerce to i32
> > +  %cav1.sroa.0.1.insert = shl i32 %tmp, 22
> > +  %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26
> > +  ret i32 %tmp1
> > +}
> > +
> > +define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y)
> nounwind optsize ssp {
> > +; CHECK-LABEL: fct1:
> > +; CHECK: ubfm
> > +; CHECK-NOT: and
> > +; CHECK: ret
> > +
> > +  %tmp = bitcast %struct.Z* %x to i64*
> > +  %tmp1 = load i64* %tmp, align 4
> > +  %b = getelementptr inbounds %struct.A* %y, i64 0, i32 0
> > +  %bf.clear = lshr i64 %tmp1, 3
> > +  %bf.clear.lobit = and i64 %bf.clear, 1
> > +  store i64 %bf.clear.lobit, i64* %b, align 8
> > +  ret void
> > +}
> > +
> > +define i64 @fct2(i64 %cav1.coerce) nounwind {
> > +; CHECK-LABEL: fct2:
> > +; CHECK: sbfm  x0, x0, #0, #35
> > +  %tmp = shl i64 %cav1.coerce, 28
> > +  %bf.val.sext = ashr exact i64 %tmp, 28
> > +  ret i64 %bf.val.sext
> > +}
> > +
> > +define i64 @fct3(i64 %cav1.coerce) nounwind {
> > +; CHECK-LABEL: fct3:
> > +; CHECK: sbfm  x0, x0, #4, #41
> > +  %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22
> > +  %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26
> > +  ret i64 %tmp1
> > +}
> > +
> > +define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct4:
> > +; CHECK: ldr [[REG1:x[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], x1, #16, #39
> > +; CHECK-NEXT: str [[REG1]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i64* %y, align 8
> > +  %and = and i64 %0, -16777216
> > +  %shr = lshr i64 %x, 16
> > +  %and1 = and i64 %shr, 16777215
> > +  %or = or i64 %and, %and1
> > +  store i64 %or, i64* %y, align 8
> > +  ret void
> > +}
> > +
> > +define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct5:
> > +; CHECK: ldr [[REG1:w[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
> > +; CHECK-NEXT: str [[REG1]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i32* %y, align 8
> > +  %and = and i32 %0, -8
> > +  %shr = lshr i32 %x, 16
> > +  %and1 = and i32 %shr, 7
> > +  %or = or i32 %and, %and1
> > +  store i32 %or, i32* %y, align 8
> > +  ret void
> > +}
> > +
> > +; Check if we can still catch bfm instruction when we drop some low bits
> > +define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct6:
> > +; CHECK: ldr [[REG1:w[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i32* %y, align 8
> > +  %and = and i32 %0, -8
> > +  %shr = lshr i32 %x, 16
> > +  %and1 = and i32 %shr, 7
> > +  %or = or i32 %and, %and1
> > +  %shr1 = lshr i32 %or, 2
> > +  store i32 %shr1, i32* %y, align 8
> > +  ret void
> > +}
> > +
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct7:
> > +; CHECK: ldr [[REG1:w[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
> > +; lsl is an alias of ubfm
> > +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i32* %y, align 8
> > +  %and = and i32 %0, -8
> > +  %shr = lshr i32 %x, 16
> > +  %and1 = and i32 %shr, 7
> > +  %or = or i32 %and, %and1
> > +  %shl = shl i32 %or, 2
> > +  store i32 %shl, i32* %y, align 8
> > +  ret void
> > +}
> > +
> > +
> > +; Check if we can still catch bfm instruction when we drop some low bits
> > +; (i64 version)
> > +define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct8:
> > +; CHECK: ldr [[REG1:x[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i64* %y, align 8
> > +  %and = and i64 %0, -8
> > +  %shr = lshr i64 %x, 16
> > +  %and1 = and i64 %shr, 7
> > +  %or = or i64 %and, %and1
> > +  %shr1 = lshr i64 %or, 2
> > +  store i64 %shr1, i64* %y, align 8
> > +  ret void
> > +}
> > +
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +; (i64 version)
> > +define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct9:
> > +; CHECK: ldr [[REG1:x[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i64* %y, align 8
> > +  %and = and i64 %0, -8
> > +  %shr = lshr i64 %x, 16
> > +  %and1 = and i64 %shr, 7
> > +  %or = or i64 %and, %and1
> > +  %shl = shl i64 %or, 2
> > +  store i64 %shl, i64* %y, align 8
> > +  ret void
> > +}
> > +
> > +; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
> > +; (i32 version)
> > +define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct10:
> > +; CHECK: ldr [[REG1:w[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], w1, #0, #2
> > +; lsl is an alias of ubfm
> > +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i32* %y, align 8
> > +  %and = and i32 %0, -8
> > +  %and1 = and i32 %x, 7
> > +  %or = or i32 %and, %and1
> > +  %shl = shl i32 %or, 2
> > +  store i32 %shl, i32* %y, align 8
> > +  ret void
> > +}
> > +
> > +; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
> > +; (i64 version)
> > +define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct11:
> > +; CHECK: ldr [[REG1:x[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], x1, #0, #2
> > +; lsl is an alias of ubfm
> > +; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i64* %y, align 8
> > +  %and = and i64 %0, -8
> > +  %and1 = and i64 %x, 7
> > +  %or = or i64 %and, %and1
> > +  %shl = shl i64 %or, 2
> > +  store i64 %shl, i64* %y, align 8
> > +  ret void
> > +}
> > +
> > +define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align
> 2 {
> > +; CHECK-LABEL: fct12bis:
> > +; CHECK-NOT: and
> > +; CHECK: ubfm w0, w0, #11, #11
> > +  %and.i.i = and i32 %tmp2, 2048
> > +  %tobool.i.i = icmp ne i32 %and.i.i, 0
> > +  ret i1 %tobool.i.i
> > +}
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +; and some low bits
> > +define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct12:
> > +; CHECK: ldr [[REG1:w[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: ubfm [[REG2:w[0-9]+]], [[REG1]], #2, #29
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i32* %y, align 8
> > +  %and = and i32 %0, -8
> > +  %shr = lshr i32 %x, 16
> > +  %and1 = and i32 %shr, 7
> > +  %or = or i32 %and, %and1
> > +  %shl = shl i32 %or, 2
> > +  %shr2 = lshr i32 %shl, 4
> > +  store i32 %shr2, i32* %y, align 8
> > +  ret void
> > +}
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +; and some low bits
> > +; (i64 version)
> > +define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct13:
> > +; CHECK: ldr [[REG1:x[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: ubfm [[REG2:x[0-9]+]], [[REG1]], #2, #61
> > +; CHECK-NEXT: str [[REG2]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i64* %y, align 8
> > +  %and = and i64 %0, -8
> > +  %shr = lshr i64 %x, 16
> > +  %and1 = and i64 %shr, 7
> > +  %or = or i64 %and, %and1
> > +  %shl = shl i64 %or, 2
> > +  %shr2 = lshr i64 %shl, 4
> > +  store i64 %shr2, i64* %y, align 8
> > +  ret void
> > +}
> > +
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +; and some low bits
> > +define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct14:
> > +; CHECK: ldr [[REG1:w[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], w1, #16, #23
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4
> > +; CHECK-NEXT: bfm [[REG2]], w2, #5, #7
> > +; lsl is an alias of ubfm
> > +; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2
> > +; CHECK-NEXT: str [[REG3]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i32* %y, align 8
> > +  %and = and i32 %0, -256
> > +  %shr = lshr i32 %x, 16
> > +  %and1 = and i32 %shr, 255
> > +  %or = or i32 %and, %and1
> > +  %shl = lshr i32 %or, 4
> > +  %and2 = and i32 %shl, -8
> > +  %shr1 = lshr i32 %x1, 5
> > +  %and3 = and i32 %shr1, 7
> > +  %or1 = or i32 %and2, %and3
> > +  %shl1 = shl i32 %or1, 2
> > +  store i32 %shl1, i32* %y, align 8
> > +  ret void
> > +}
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +; and some low bits
> > +; (i64 version)
> > +define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct15:
> > +; CHECK: ldr [[REG1:x[0-9]+]],
> > +; CHECK-NEXT: bfm [[REG1]], x1, #16, #23
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4
> > +; CHECK-NEXT: bfm [[REG2]], x2, #5, #7
> > +; lsl is an alias of ubfm
> > +; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2
> > +; CHECK-NEXT: str [[REG3]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i64* %y, align 8
> > +  %and = and i64 %0, -256
> > +  %shr = lshr i64 %x, 16
> > +  %and1 = and i64 %shr, 255
> > +  %or = or i64 %and, %and1
> > +  %shl = lshr i64 %or, 4
> > +  %and2 = and i64 %shl, -8
> > +  %shr1 = lshr i64 %x1, 5
> > +  %and3 = and i64 %shr1, 7
> > +  %or1 = or i64 %and2, %and3
> > +  %shl1 = shl i64 %or1, 2
> > +  store i64 %shl1, i64* %y, align 8
> > +  ret void
> > +}
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +; and some low bits and a masking operation has to be kept
> > +define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct16:
> > +; CHECK: ldr [[REG1:w[0-9]+]],
> > +; Create the constant
> > +; CHECK: movz [[REGCST:w[0-9]+]], #26, lsl #16
> > +; CHECK: movk [[REGCST]], #33120
> > +; Do the masking
> > +; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]]
> > +; CHECK-NEXT: bfm [[REG2]], w1, #16, #18
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: ubfm [[REG3:w[0-9]+]], [[REG2]], #2, #29
> > +; CHECK-NEXT: str [[REG3]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i32* %y, align 8
> > +  %and = and i32 %0, 1737056
> > +  %shr = lshr i32 %x, 16
> > +  %and1 = and i32 %shr, 7
> > +  %or = or i32 %and, %and1
> > +  %shl = shl i32 %or, 2
> > +  %shr2 = lshr i32 %shl, 4
> > +  store i32 %shr2, i32* %y, align 8
> > +  ret void
> > +}
> > +
> > +
> > +; Check if we can still catch bfm instruction when we drop some high
> bits
> > +; and some low bits and a masking operation has to be kept
> > +; (i64 version)
> > +define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize
> inlinehint ssp {
> > +entry:
> > +; CHECK-LABEL: fct17:
> > +; CHECK: ldr [[REG1:x[0-9]+]],
> > +; Create the constant
> > +; CHECK: movz [[REGCST:x[0-9]+]], #26, lsl #16
> > +; CHECK: movk [[REGCST]], #33120
> > +; Do the masking
> > +; CHECK: and [[REG2:x[0-9]+]], [[REG1]], [[REGCST]]
> > +; CHECK-NEXT: bfm [[REG2]], x1, #16, #18
> > +; lsr is an alias of ubfm
> > +; CHECK-NEXT: ubfm [[REG3:x[0-9]+]], [[REG2]], #2, #61
> > +; CHECK-NEXT: str [[REG3]],
> > +; CHECK-NEXT: ret
> > +  %0 = load i64* %y, align 8
> > +  %and = and i64 %0, 1737056
> > +  %shr = lshr i64 %x, 16
> > +  %and1 = and i64 %shr, 7
> > +  %or = or i64 %and, %and1
> > +  %shl = shl i64 %or, 2
> > +  %shr2 = lshr i64 %shl, 4
> > +  store i64 %shr2, i64* %y, align 8
> > +  ret void
> > +}
> > +
> > +define i64 @fct18(i32 %xor72) nounwind ssp {
> > +; CHECK-LABEL: fct18:
> > +; CHECK: ubfm x0, x0, #9, #16
> > +  %shr81 = lshr i32 %xor72, 9
> > +  %conv82 = zext i32 %shr81 to i64
> > +  %result = and i64 %conv82, 255
> > +  ret i64 %result
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/blockaddress.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/blockaddress.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/blockaddress.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/blockaddress.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,30 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
> > +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
> --check-prefix=CHECK-LINUX
> > +; RUN: llc < %s -mtriple=arm64-linux-gnu -code-model=large| FileCheck
> %s --check-prefix=CHECK-LARGE
> > +
> > +; rdar://9188695
> > +
> > +define i64 @t() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t:
> > +; CHECK: adrp [[REG:x[0-9]+]], Ltmp1 at PAGE
> > +; CHECK: add {{x[0-9]+}}, [[REG]], Ltmp1 at PAGEOFF
> > +
> > +; CHECK-LINUX-LABEL: t:
> > +; CHECK-LINUX: adrp [[REG:x[0-9]+]], .Ltmp1
> > +; CHECK-LINUX: add {{x[0-9]+}}, [[REG]], :lo12:.Ltmp1
> > +
> > +; CHECK-LARGE-LABEL: t:
> > +; CHECK-LARGE: movz [[ADDR_REG:x[0-9]+]],
> #:abs_g3:[[DEST_LBL:.Ltmp[0-9]+]]
> > +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g2_nc:[[DEST_LBL]]
> > +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g1_nc:[[DEST_LBL]]
> > +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g0_nc:[[DEST_LBL]]
> > +
> > +  %recover = alloca i64, align 8
> > +  store volatile i64 ptrtoint (i8* blockaddress(@t, %mylabel) to i64),
> i64* %recover, align 8
> > +  br label %mylabel
> > +
> > +mylabel:
> > +  %tmp = load volatile i64* %recover, align 8
> > +  ret i64 %tmp
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/build-vector.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/build-vector.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/build-vector.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/build-vector.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,35 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +; Check that building up a vector w/ only one non-zero lane initializes
> > +; intelligently.
> > +define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind {
> > +; CHECK-LABEL: one_lane:
> > +; CHECK: dup.16b v[[REG:[0-9]+]], wzr
> > +; CHECK-NEXT: ins.b v[[REG]][0], w1
> > +; v and q are aliases, and str is prefered against st.16b when possible
> > +; rdar://11246289
> > +; CHECK: str q[[REG]], [x0]
> > +; CHECK: ret
> > +  %conv = trunc i32 %skip0 to i8
> > +  %vset_lane = insertelement <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8
> 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8
> %conv, i32 0
> > +  %tmp = bitcast i32* %out_int to <4 x i32>*
> > +  %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32>
> > +  store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16
> > +  ret void
> > +}
> > +
> > +; Check that building a vector from floats doesn't insert an unnecessary
> > +; copy for lane zero.
> > +define <4 x float>  @foo(float %a, float %b, float %c, float %d)
> nounwind {
> > +; CHECK-LABEL: foo:
> > +; CHECK-NOT: ins.s v0[0], v0[0]
> > +; CHECK: ins.s v0[1], v1[0]
> > +; CHECK: ins.s v0[2], v2[0]
> > +; CHECK: ins.s v0[3], v3[0]
> > +; CHECK: ret
> > +  %1 = insertelement <4 x float> undef, float %a, i32 0
> > +  %2 = insertelement <4 x float> %1, float %b, i32 1
> > +  %3 = insertelement <4 x float> %2, float %c, i32 2
> > +  %4 = insertelement <4 x float> %3, float %d, i32 3
> > +  ret <4 x float> %4
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/call-tailcalls.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/call-tailcalls.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/call-tailcalls.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/call-tailcalls.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,91 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
> > +
> > + at t = weak global i32 ()* null
> > + at x = external global i32, align 4
> > +
> > +define void @t2() {
> > +; CHECK-LABEL: t2:
> > +; CHECK: adrp  x[[GOTADDR:[0-9]+]], _t at GOTPAGE
> > +; CHECK: ldr   x[[ADDR:[0-9]+]], [x[[GOTADDR]], _t at GOTPAGEOFF]
> > +; CHECK: ldr   x[[DEST:[0-9]+]], [x[[ADDR]]]
> > +; CHECK: br    x[[DEST]]
> > +  %tmp = load i32 ()** @t
> > +  %tmp.upgrd.2 = tail call i32 %tmp()
> > +  ret void
> > +}
> > +
> > +define void @t3() {
> > +; CHECK-LABEL: t3:
> > +; CHECK: b     _t2
> > +  tail call void @t2()
> > +  ret void
> > +}
> > +
> > +define double @t4(double %a) nounwind readonly ssp {
> > +; CHECK-LABEL: t4:
> > +; CHECK: b     _sin
> > +  %tmp = tail call double @sin(double %a) nounwind readonly
> > +  ret double %tmp
> > +}
> > +
> > +define float @t5(float %a) nounwind readonly ssp {
> > +; CHECK-LABEL: t5:
> > +; CHECK: b     _sinf
> > +  %tmp = tail call float @sinf(float %a) nounwind readonly
> > +  ret float %tmp
> > +}
> > +
> > +define void @t7() nounwind {
> > +; CHECK-LABEL: t7:
> > +; CHECK: b     _foo
> > +; CHECK: b     _bar
> > +
> > +  br i1 undef, label %bb, label %bb1.lr.ph
> > +
> > +bb1.lr.ph:                                        ; preds = %entry
> > +  tail call void @bar() nounwind
> > +  ret void
> > +
> > +bb:                                               ; preds = %entry
> > +  tail call void @foo() nounwind
> > +  ret void
> > +}
> > +
> > +define i32 @t8(i32 %x) nounwind ssp {
> > +; CHECK-LABEL: t8:
> > +; CHECK: b     _a
> > +; CHECK: b     _b
> > +; CHECK: b     _c
> > +  %and = and i32 %x, 1
> > +  %tobool = icmp eq i32 %and, 0
> > +  br i1 %tobool, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call = tail call i32 @a(i32 %x) nounwind
> > +  br label %return
> > +
> > +if.end:                                           ; preds = %entry
> > +  %and1 = and i32 %x, 2
> > +  %tobool2 = icmp eq i32 %and1, 0
> > +  br i1 %tobool2, label %if.end5, label %if.then3
> > +
> > +if.then3:                                         ; preds = %if.end
> > +  %call4 = tail call i32 @b(i32 %x) nounwind
> > +  br label %return
> > +
> > +if.end5:                                          ; preds = %if.end
> > +  %call6 = tail call i32 @c(i32 %x) nounwind
> > +  br label %return
> > +
> > +return:                                           ; preds = %if.end5,
> %if.then3, %if.then
> > +  %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [
> %call6, %if.end5 ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +declare float @sinf(float) nounwind readonly
> > +declare double @sin(double) nounwind readonly
> > +declare void @bar() nounwind
> > +declare void @foo() nounwind
> > +declare i32 @a(i32)
> > +declare i32 @b(i32)
> > +declare i32 @c(i32)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/cast-opt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/cast-opt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/cast-opt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/cast-opt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,31 @@
> > +; RUN: llc -O3 -march=arm64 -mtriple arm64-apple-ios5.0.0 < %s |
> FileCheck %s
> > +; <rdar://problem/15992732>
> > +; Zero truncation is not necessary when the values are extended properly
> > +; already.
> > +
> > + at block = common global i8* null, align 8
> > +
> > +define zeroext i8 @foo(i32 %i1, i32 %i2) {
> > +; CHECK-LABEL: foo:
> > +; CHECK: csinc
> > +; CHECK-NOT: and
> > +entry:
> > +  %idxprom = sext i32 %i1 to i64
> > +  %0 = load i8** @block, align 8
> > +  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
> > +  %1 = load i8* %arrayidx, align 1
> > +  %idxprom1 = sext i32 %i2 to i64
> > +  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
> > +  %2 = load i8* %arrayidx2, align 1
> > +  %cmp = icmp eq i8 %1, %2
> > +  br i1 %cmp, label %return, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %cmp7 = icmp ugt i8 %1, %2
> > +  %conv9 = zext i1 %cmp7 to i8
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i8 [ %conv9, %if.then ], [ 1, %entry ]
> > +  ret i8 %retval.0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/ccmp-heuristics.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ccmp-heuristics.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/ccmp-heuristics.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/ccmp-heuristics.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,190 @@
> > +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp |
> FileCheck %s
> > +target triple = "arm64-apple-ios7.0.0"
> > +
> > + at channelColumns = external global i64
> > + at channelTracks = external global i64
> > + at mazeRoute = external hidden unnamed_addr global i8*, align 8
> > + at TOP = external global i64*
> > + at BOT = external global i64*
> > + at netsAssign = external global i64*
> > +
> > +; Function from yacr2/maze.c
> > +; The branch at the end of %if.then is driven by %cmp5 and %cmp6.
> > +; Isel converts the and i1 into two branches, and arm64-ccmp should not
> convert
> > +; it back again. %cmp6 has much higher latency than %cmp5.
> > +; CHECK: Maze1
> > +; CHECK: %if.then
> > +; CHECK: cmp x{{[0-9]+}}, #2
> > +; CHECK-NEXT b.cc
> > +; CHECK: %if.then
> > +; CHECK: cmp x{{[0-9]+}}, #2
> > +; CHECK-NEXT b.cc
> > +define i32 @Maze1() nounwind ssp {
> > +entry:
> > +  %0 = load i64* @channelColumns, align 8, !tbaa !0
> > +  %cmp90 = icmp eq i64 %0, 0
> > +  br i1 %cmp90, label %for.end, label %for.body
> > +
> > +for.body:                                         ; preds = %for.inc,
> %entry
> > +  %1 = phi i64 [ %0, %entry ], [ %37, %for.inc ]
> > +  %i.092 = phi i64 [ 1, %entry ], [ %inc53, %for.inc ]
> > +  %numLeft.091 = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
> > +  %2 = load i8** @mazeRoute, align 8, !tbaa !3
> > +  %arrayidx = getelementptr inbounds i8* %2, i64 %i.092
> > +  %3 = load i8* %arrayidx, align 1, !tbaa !1
> > +  %tobool = icmp eq i8 %3, 0
> > +  br i1 %tobool, label %for.inc, label %if.then
> > +
> > +if.then:                                          ; preds = %for.body
> > +  %4 = load i64** @TOP, align 8, !tbaa !3
> > +  %arrayidx1 = getelementptr inbounds i64* %4, i64 %i.092
> > +  %5 = load i64* %arrayidx1, align 8, !tbaa !0
> > +  %6 = load i64** @netsAssign, align 8, !tbaa !3
> > +  %arrayidx2 = getelementptr inbounds i64* %6, i64 %5
> > +  %7 = load i64* %arrayidx2, align 8, !tbaa !0
> > +  %8 = load i64** @BOT, align 8, !tbaa !3
> > +  %arrayidx3 = getelementptr inbounds i64* %8, i64 %i.092
> > +  %9 = load i64* %arrayidx3, align 8, !tbaa !0
> > +  %arrayidx4 = getelementptr inbounds i64* %6, i64 %9
> > +  %10 = load i64* %arrayidx4, align 8, !tbaa !0
> > +  %cmp5 = icmp ugt i64 %i.092, 1
> > +  %cmp6 = icmp ugt i64 %10, 1
> > +  %or.cond = and i1 %cmp5, %cmp6
> > +  br i1 %or.cond, label %land.lhs.true7, label %if.else
> > +
> > +land.lhs.true7:                                   ; preds = %if.then
> > +  %11 = load i64* @channelTracks, align 8, !tbaa !0
> > +  %add = add i64 %11, 1
> > +  %call = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add, i64
> %10, i64 0, i64 %7, i32 -1, i32 -1)
> > +  %tobool8 = icmp eq i32 %call, 0
> > +  br i1 %tobool8, label %land.lhs.true7.if.else_crit_edge, label
> %if.then9
> > +
> > +land.lhs.true7.if.else_crit_edge:                 ; preds =
> %land.lhs.true7
> > +  %.pre = load i64* @channelColumns, align 8, !tbaa !0
> > +  br label %if.else
> > +
> > +if.then9:                                         ; preds =
> %land.lhs.true7
> > +  %12 = load i8** @mazeRoute, align 8, !tbaa !3
> > +  %arrayidx10 = getelementptr inbounds i8* %12, i64 %i.092
> > +  store i8 0, i8* %arrayidx10, align 1, !tbaa !1
> > +  %13 = load i64** @TOP, align 8, !tbaa !3
> > +  %arrayidx11 = getelementptr inbounds i64* %13, i64 %i.092
> > +  %14 = load i64* %arrayidx11, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %14)
> > +  %15 = load i64** @BOT, align 8, !tbaa !3
> > +  %arrayidx12 = getelementptr inbounds i64* %15, i64 %i.092
> > +  %16 = load i64* %arrayidx12, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %16)
> > +  br label %for.inc
> > +
> > +if.else:                                          ; preds =
> %land.lhs.true7.if.else_crit_edge, %if.then
> > +  %17 = phi i64 [ %.pre, %land.lhs.true7.if.else_crit_edge ], [ %1,
> %if.then ]
> > +  %cmp13 = icmp ult i64 %i.092, %17
> > +  %or.cond89 = and i1 %cmp13, %cmp6
> > +  br i1 %or.cond89, label %land.lhs.true16, label %if.else24
> > +
> > +land.lhs.true16:                                  ; preds = %if.else
> > +  %18 = load i64* @channelTracks, align 8, !tbaa !0
> > +  %add17 = add i64 %18, 1
> > +  %call18 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add17, i64
> %10, i64 0, i64 %7, i32 1, i32 -1)
> > +  %tobool19 = icmp eq i32 %call18, 0
> > +  br i1 %tobool19, label %if.else24, label %if.then20
> > +
> > +if.then20:                                        ; preds =
> %land.lhs.true16
> > +  %19 = load i8** @mazeRoute, align 8, !tbaa !3
> > +  %arrayidx21 = getelementptr inbounds i8* %19, i64 %i.092
> > +  store i8 0, i8* %arrayidx21, align 1, !tbaa !1
> > +  %20 = load i64** @TOP, align 8, !tbaa !3
> > +  %arrayidx22 = getelementptr inbounds i64* %20, i64 %i.092
> > +  %21 = load i64* %arrayidx22, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %21)
> > +  %22 = load i64** @BOT, align 8, !tbaa !3
> > +  %arrayidx23 = getelementptr inbounds i64* %22, i64 %i.092
> > +  %23 = load i64* %arrayidx23, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %23)
> > +  br label %for.inc
> > +
> > +if.else24:                                        ; preds =
> %land.lhs.true16, %if.else
> > +  br i1 %cmp5, label %land.lhs.true26, label %if.else36
> > +
> > +land.lhs.true26:                                  ; preds = %if.else24
> > +  %24 = load i64* @channelTracks, align 8, !tbaa !0
> > +  %cmp27 = icmp ult i64 %7, %24
> > +  br i1 %cmp27, label %land.lhs.true28, label %if.else36
> > +
> > +land.lhs.true28:                                  ; preds =
> %land.lhs.true26
> > +  %add29 = add i64 %24, 1
> > +  %call30 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7,
> i64 %add29, i64 %10, i32 -1, i32 1)
> > +  %tobool31 = icmp eq i32 %call30, 0
> > +  br i1 %tobool31, label %if.else36, label %if.then32
> > +
> > +if.then32:                                        ; preds =
> %land.lhs.true28
> > +  %25 = load i8** @mazeRoute, align 8, !tbaa !3
> > +  %arrayidx33 = getelementptr inbounds i8* %25, i64 %i.092
> > +  store i8 0, i8* %arrayidx33, align 1, !tbaa !1
> > +  %26 = load i64** @TOP, align 8, !tbaa !3
> > +  %arrayidx34 = getelementptr inbounds i64* %26, i64 %i.092
> > +  %27 = load i64* %arrayidx34, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %27)
> > +  %28 = load i64** @BOT, align 8, !tbaa !3
> > +  %arrayidx35 = getelementptr inbounds i64* %28, i64 %i.092
> > +  %29 = load i64* %arrayidx35, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %29)
> > +  br label %for.inc
> > +
> > +if.else36:                                        ; preds =
> %land.lhs.true28, %land.lhs.true26, %if.else24
> > +  %30 = load i64* @channelColumns, align 8, !tbaa !0
> > +  %cmp37 = icmp ult i64 %i.092, %30
> > +  br i1 %cmp37, label %land.lhs.true38, label %if.else48
> > +
> > +land.lhs.true38:                                  ; preds = %if.else36
> > +  %31 = load i64* @channelTracks, align 8, !tbaa !0
> > +  %cmp39 = icmp ult i64 %7, %31
> > +  br i1 %cmp39, label %land.lhs.true40, label %if.else48
> > +
> > +land.lhs.true40:                                  ; preds =
> %land.lhs.true38
> > +  %add41 = add i64 %31, 1
> > +  %call42 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7,
> i64 %add41, i64 %10, i32 1, i32 1)
> > +  %tobool43 = icmp eq i32 %call42, 0
> > +  br i1 %tobool43, label %if.else48, label %if.then44
> > +
> > +if.then44:                                        ; preds =
> %land.lhs.true40
> > +  %32 = load i8** @mazeRoute, align 8, !tbaa !3
> > +  %arrayidx45 = getelementptr inbounds i8* %32, i64 %i.092
> > +  store i8 0, i8* %arrayidx45, align 1, !tbaa !1
> > +  %33 = load i64** @TOP, align 8, !tbaa !3
> > +  %arrayidx46 = getelementptr inbounds i64* %33, i64 %i.092
> > +  %34 = load i64* %arrayidx46, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %34)
> > +  %35 = load i64** @BOT, align 8, !tbaa !3
> > +  %arrayidx47 = getelementptr inbounds i64* %35, i64 %i.092
> > +  %36 = load i64* %arrayidx47, align 8, !tbaa !0
> > +  tail call fastcc void @CleanNet(i64 %36)
> > +  br label %for.inc
> > +
> > +if.else48:                                        ; preds =
> %land.lhs.true40, %land.lhs.true38, %if.else36
> > +  %inc = add nsw i32 %numLeft.091, 1
> > +  br label %for.inc
> > +
> > +for.inc:                                          ; preds = %if.else48,
> %if.then44, %if.then32, %if.then20, %if.then9, %for.body
> > +  %numLeft.1 = phi i32 [ %numLeft.091, %if.then9 ], [ %numLeft.091,
> %if.then20 ], [ %numLeft.091, %if.then32 ], [ %numLeft.091, %if.then44 ], [
> %inc, %if.else48 ], [ %numLeft.091, %for.body ]
> > +  %inc53 = add i64 %i.092, 1
> > +  %37 = load i64* @channelColumns, align 8, !tbaa !0
> > +  %cmp = icmp ugt i64 %inc53, %37
> > +  br i1 %cmp, label %for.end, label %for.body
> > +
> > +for.end:                                          ; preds = %for.inc,
> %entry
> > +  %numLeft.0.lcssa = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
> > +  ret i32 %numLeft.0.lcssa
> > +}
> > +
> > +; Materializable
> > +declare hidden fastcc i32 @Maze1Mech(i64, i64, i64, i64, i64, i32, i32)
> nounwind ssp
> > +
> > +; Materializable
> > +declare hidden fastcc void @CleanNet(i64) nounwind ssp
> > +
> > +!0 = metadata !{metadata !"long", metadata !1}
> > +!1 = metadata !{metadata !"omnipotent char", metadata !2}
> > +!2 = metadata !{metadata !"Simple C/C++ TBAA"}
> > +!3 = metadata !{metadata !"any pointer", metadata !1}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/ccmp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ccmp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/ccmp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/ccmp.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,289 @@
> > +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp
> -arm64-stress-ccmp | FileCheck %s
> > +target triple = "arm64-apple-ios"
> > +
> > +; CHECK: single_same
> > +; CHECK: cmp w0, #5
> > +; CHECK-NEXT: ccmp w1, #17, #4, ne
> > +; CHECK-NEXT: b.ne
> > +; CHECK: %if.then
> > +; CHECK: bl _foo
> > +; CHECK: %if.end
> > +define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp eq i32 %a, 5
> > +  %cmp1 = icmp eq i32 %b, 17
> > +  %or.cond = or i1 %cmp, %cmp1
> > +  br i1 %or.cond, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +
> > +; Different condition codes for the two compares.
> > +; CHECK: single_different
> > +; CHECK: cmp w0, #6
> > +; CHECK-NEXT: ccmp w1, #17, #0, ge
> > +; CHECK-NEXT: b.eq
> > +; CHECK: %if.then
> > +; CHECK: bl _foo
> > +; CHECK: %if.end
> > +define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp sle i32 %a, 5
> > +  %cmp1 = icmp ne i32 %b, 17
> > +  %or.cond = or i1 %cmp, %cmp1
> > +  br i1 %or.cond, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +
> > +; Second block clobbers the flags, can't convert (easily).
> > +; CHECK: single_flagclobber
> > +; CHECK: cmp
> > +; CHECK: b.eq
> > +; CHECK: cmp
> > +; CHECK: b.gt
> > +define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp eq i32 %a, 5
> > +  br i1 %cmp, label %if.then, label %lor.lhs.false
> > +
> > +lor.lhs.false:                                    ; preds = %entry
> > +  %cmp1 = icmp slt i32 %b, 7
> > +  %mul = shl nsw i32 %b, 1
> > +  %add = add nsw i32 %b, 1
> > +  %cond = select i1 %cmp1, i32 %mul, i32 %add
> > +  %cmp2 = icmp slt i32 %cond, 17
> > +  br i1 %cmp2, label %if.then, label %if.end
> > +
> > +if.then:                                          ; preds =
> %lor.lhs.false, %entry
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %if.then,
> %lor.lhs.false
> > +  ret i32 7
> > +}
> > +
> > +; Second block clobbers the flags and ends with a tbz terminator.
> > +; CHECK: single_flagclobber_tbz
> > +; CHECK: cmp
> > +; CHECK: b.eq
> > +; CHECK: cmp
> > +; CHECK: tbz
> > +define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp eq i32 %a, 5
> > +  br i1 %cmp, label %if.then, label %lor.lhs.false
> > +
> > +lor.lhs.false:                                    ; preds = %entry
> > +  %cmp1 = icmp slt i32 %b, 7
> > +  %mul = shl nsw i32 %b, 1
> > +  %add = add nsw i32 %b, 1
> > +  %cond = select i1 %cmp1, i32 %mul, i32 %add
> > +  %and = and i32 %cond, 8
> > +  %cmp2 = icmp ne i32 %and, 0
> > +  br i1 %cmp2, label %if.then, label %if.end
> > +
> > +if.then:                                          ; preds =
> %lor.lhs.false, %entry
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %if.then,
> %lor.lhs.false
> > +  ret i32 7
> > +}
> > +
> > +; Speculatively execute division by zero.
> > +; The sdiv/udiv instructions do not trap when the divisor is zero, so
> they are
> > +; safe to speculate.
> > +; CHECK: speculate_division
> > +; CHECK-NOT: cmp
> > +; CHECK: sdiv
> > +; CHECK: cmp
> > +; CHECK-NEXT: ccmp
> > +define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp sgt i32 %a, 0
> > +  br i1 %cmp, label %land.lhs.true, label %if.end
> > +
> > +land.lhs.true:
> > +  %div = sdiv i32 %b, %a
> > +  %cmp1 = icmp slt i32 %div, 17
> > +  br i1 %cmp1, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +
> > +; Floating point compare.
> > +; CHECK: single_fcmp
> > +; CHECK: cmp
> > +; CHECK-NOT: b.
> > +; CHECK: fccmp {{.*}}, #8, ge
> > +; CHECK: b.lt
> > +define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp sgt i32 %a, 0
> > +  br i1 %cmp, label %land.lhs.true, label %if.end
> > +
> > +land.lhs.true:
> > +  %conv = sitofp i32 %a to float
> > +  %div = fdiv float %b, %conv
> > +  %cmp1 = fcmp oge float %div, 1.700000e+01
> > +  br i1 %cmp1, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +
> > +; Chain multiple compares.
> > +; CHECK: multi_different
> > +; CHECK: cmp
> > +; CHECK: ccmp
> > +; CHECK: ccmp
> > +; CHECK: b.
> > +define void @multi_different(i32 %a, i32 %b, i32 %c) nounwind ssp {
> > +entry:
> > +  %cmp = icmp sgt i32 %a, %b
> > +  br i1 %cmp, label %land.lhs.true, label %if.end
> > +
> > +land.lhs.true:
> > +  %div = sdiv i32 %b, %a
> > +  %cmp1 = icmp eq i32 %div, 5
> > +  %cmp4 = icmp sgt i32 %div, %c
> > +  %or.cond = and i1 %cmp1, %cmp4
> > +  br i1 %or.cond, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret void
> > +}
> > +
> > +; Convert a cbz in the head block.
> > +; CHECK: cbz_head
> > +; CHECK: cmp w0, #0
> > +; CHECK: ccmp
> > +define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp eq i32 %a, 0
> > +  %cmp1 = icmp ne i32 %b, 17
> > +  %or.cond = or i1 %cmp, %cmp1
> > +  br i1 %or.cond, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +
> > +; Check that the immediate operand is in range. The ccmp instruction
> encodes a
> > +; smaller range of immediates than subs/adds.
> > +; The ccmp immediates must be in the range 0-31.
> > +; CHECK: immediate_range
> > +; CHECK-NOT: ccmp
> > +define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp eq i32 %a, 5
> > +  %cmp1 = icmp eq i32 %b, 32
> > +  %or.cond = or i1 %cmp, %cmp1
> > +  br i1 %or.cond, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +
> > +; Convert a cbz in the second block.
> > +; CHECK: cbz_second
> > +; CHECK: cmp w0, #0
> > +; CHECK: ccmp w1, #0, #0, ne
> > +; CHECK: b.eq
> > +define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp eq i32 %a, 0
> > +  %cmp1 = icmp ne i32 %b, 0
> > +  %or.cond = or i1 %cmp, %cmp1
> > +  br i1 %or.cond, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +
> > +; Convert a cbnz in the second block.
> > +; CHECK: cbnz_second
> > +; CHECK: cmp w0, #0
> > +; CHECK: ccmp w1, #0, #4, ne
> > +; CHECK: b.ne
> > +define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +  %cmp = icmp eq i32 %a, 0
> > +  %cmp1 = icmp eq i32 %b, 0
> > +  %or.cond = or i1 %cmp, %cmp1
> > +  br i1 %or.cond, label %if.then, label %if.end
> > +
> > +if.then:
> > +  %call = tail call i32 @foo() nounwind
> > +  br label %if.end
> > +
> > +if.end:
> > +  ret i32 7
> > +}
> > +declare i32 @foo()
> > +
> > +%str1 = type { %str2 }
> > +%str2 = type { [24 x i8], i8*, i32, %str1*, i32, [4 x i8], %str1*,
> %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, i8*, i8,
> i8*, %str1*, i8* }
> > +
> > +; Test case distilled from 126.gcc.
> > +; The phi in sw.bb.i.i gets multiple operands for the %entry
> predecessor.
> > +; CHECK: build_modify_expr
> > +define void @build_modify_expr() nounwind ssp {
> > +entry:
> > +  switch i32 undef, label %sw.bb.i.i [
> > +    i32 69, label %if.end85
> > +    i32 70, label %if.end85
> > +    i32 71, label %if.end85
> > +    i32 72, label %if.end85
> > +    i32 73, label %if.end85
> > +    i32 105, label %if.end85
> > +    i32 106, label %if.end85
> > +  ]
> > +
> > +if.end85:
> > +  ret void
> > +
> > +sw.bb.i.i:
> > +  %ref.tr.i.i = phi %str1* [ %0, %sw.bb.i.i ], [ undef, %entry ]
> > +  %operands.i.i = getelementptr inbounds %str1* %ref.tr.i.i, i64 0, i32
> 0, i32 2
> > +  %arrayidx.i.i = bitcast i32* %operands.i.i to %str1**
> > +  %0 = load %str1** %arrayidx.i.i, align 8
> > +  %code1.i.i.phi.trans.insert = getelementptr inbounds %str1* %0, i64
> 0, i32 0, i32 0, i64 16
> > +  br label %sw.bb.i.i
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/coalesce-ext.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/coalesce-ext.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/coalesce-ext.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/coalesce-ext.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,17 @@
> > +; RUN: llc -march=arm64 -mtriple=arm64-apple-darwin < %s | FileCheck %s
> > +; Check that the peephole optimizer knows about sext and zext
> instructions.
> > +; CHECK: test1sext
> > +define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
> > +  %C = add i64 %A, %B
> > +  ; CHECK: add x[[SUM:[0-9]+]], x0, x1
> > +  %D = trunc i64 %C to i32
> > +  %E = shl i64 %C, 32
> > +  %F = ashr i64 %E, 32
> > +  ; CHECK: sxtw x[[EXT:[0-9]+]], x[[SUM]]
> > +  store volatile i64 %F, i64 *%P2
> > +  ; CHECK: str x[[EXT]]
> > +  store volatile i32 %D, i32* %P
> > +  ; Reuse low bits of extended register, don't extend live range of SUM.
> > +  ; CHECK: str w[[SUM]]
> > +  ret i32 %D
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/code-model-large-abs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/code-model-large-abs.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/code-model-large-abs.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/code-model-large-abs.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,72 @@
> > +; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large < %s |
> FileCheck %s
> > +
> > + at var8 = global i8 0
> > + at var16 = global i16 0
> > + at var32 = global i32 0
> > + at var64 = global i64 0
> > +
> > +define i8* @global_addr() {
> > +; CHECK-LABEL: global_addr:
> > +  ret i8* @var8
> > +  ; The movz/movk calculation should end up returned directly in x0.
> > +; CHECK: movz x0, #:abs_g3:var8
> > +; CHECK: movk x0, #:abs_g2_nc:var8
> > +; CHECK: movk x0, #:abs_g1_nc:var8
> > +; CHECK: movk x0, #:abs_g0_nc:var8
> > +; CHECK-NEXT: ret
> > +}
> > +
> > +define i8 @global_i8() {
> > +; CHECK-LABEL: global_i8:
> > +  %val = load i8* @var8
> > +  ret i8 %val
> > +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var8
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var8
> > +; CHECK: ldrb w0, [x[[ADDR_REG]]]
> > +}
> > +
> > +define i16 @global_i16() {
> > +; CHECK-LABEL: global_i16:
> > +  %val = load i16* @var16
> > +  ret i16 %val
> > +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var16
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var16
> > +; CHECK: ldrh w0, [x[[ADDR_REG]]]
> > +}
> > +
> > +define i32 @global_i32() {
> > +; CHECK-LABEL: global_i32:
> > +  %val = load i32* @var32
> > +  ret i32 %val
> > +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var32
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var32
> > +; CHECK: ldr w0, [x[[ADDR_REG]]]
> > +}
> > +
> > +define i64 @global_i64() {
> > +; CHECK-LABEL: global_i64:
> > +  %val = load i64* @var64
> > +  ret i64 %val
> > +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var64
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var64
> > +; CHECK: ldr x0, [x[[ADDR_REG]]]
> > +}
> > +
> > +define <2 x i64> @constpool() {
> > +; CHECK-LABEL: constpool:
> > +  ret <2 x i64> <i64 123456789, i64 987654321100>
> > +
> > +; CHECK: movz x[[ADDR_REG:[0-9]+]],
> #:abs_g3:[[CPADDR:.LCPI[0-9]+_[0-9]+]]
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:[[CPADDR]]
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:[[CPADDR]]
> > +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:[[CPADDR]]
> > +; CHECK: ldr q0, [x[[ADDR_REG]]]
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/collect-loh-garbage-crash.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/collect-loh-garbage-crash.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/collect-loh-garbage-crash.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/collect-loh-garbage-crash.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,37 @@
> > +; RUN: llc -mtriple=arm64-apple-ios -O3 -arm64-collect-loh
> -arm64-collect-loh-bb-only=true
> -arm64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
> > +; Check that the LOH analysis does not crash when the analysed chained
> > +; contains instructions that are filtered out.
> > +;
> > +; Before the fix for <rdar://problem/16041712>, these cases were removed
> > +; from the main container. Now, the deterministic container does not
> allow
> > +; to remove arbitrary values, so we have to live with garbage values.
> > +; <rdar://problem/16041712>
> > +
> > +%"class.H4ISP::H4ISPDevice" = type { i32 (%"class.H4ISP::H4ISPDevice"*,
> i32, i8*, i8*)*, i8*, i32*, %"class.H4ISP::H4ISPCameraManager"* }
> > +
> > +%"class.H4ISP::H4ISPCameraManager" = type opaque
> > +
> > +declare i32
> @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"*)
> > +
> > + at pH4ISPDevice = hidden global %"class.H4ISP::H4ISPDevice"* null, align 8
> > +
> > +; CHECK-LABEL: _foo:
> > +; CHECK: ret
> > +; CHECK-NOT: .loh AdrpLdrGotLdr
> > +define void @foo() {
> > +entry:
> > +  br label %if.then83
> > +if.then83:                                        ; preds = %if.end81
> > +  %tmp = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
> > +  %call84 = call i32
> @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"*
> %tmp) #19
> > +  tail call void asm sideeffect "",
> "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"()
> > +  %tmp2 = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
> > +  tail call void asm sideeffect "",
> "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x28}"()
> > +  %pCameraManager.i268 = getelementptr inbounds
> %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3
> > +  %tmp3 = load %"class.H4ISP::H4ISPCameraManager"**
> %pCameraManager.i268, align 8
> > +  %tobool.i269 = icmp eq %"class.H4ISP::H4ISPCameraManager"* %tmp3, null
> > +  br i1 %tobool.i269, label %if.then83, label %end
> > +end:
> > +  ret void
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/collect-loh-str.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/collect-loh-str.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/collect-loh-str.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/collect-loh-str.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,23 @@
> > +; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh
> -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s
> > +; Test case for <rdar://problem/15942912>.
> > +; AdrpAddStr cannot be used when the store uses same
> > +; register as address and value. Indeed, the related
> > +; if applied, may completely remove the definition or
> > +; at least provide a wrong one (with the offset folded
> > +; into the definition).
> > +
> > +%struct.anon = type { i32*, i32** }
> > +
> > + at pptp_wan_head = internal global %struct.anon zeroinitializer, align 8
> > +
> > +; CHECK-LABEL: _pptp_wan_init
> > +; CHECK: ret
> > +; CHECK-NOT: AdrpAddStr
> > +define i32 @pptp_wan_init() {
> > +entry:
> > +  store i32* null, i32** getelementptr inbounds (%struct.anon*
> @pptp_wan_head, i64 0, i32 0), align 8
> > +  store i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64
> 0, i32 0), i32*** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64
> 0, i32 1), align 8
> > +  ret i32 0
> > +}
> > +
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/collect-loh.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/collect-loh.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/collect-loh.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/collect-loh.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,47 @@
> > +; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh
> -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s
> > +
> > + at a = internal unnamed_addr global i32 0, align 4
> > + at b = external global i32
> > +
> > +; Function Attrs: noinline nounwind ssp
> > +define void @foo(i32 %t) {
> > +entry:
> > +  %tmp = load i32* @a, align 4
> > +  %add = add nsw i32 %tmp, %t
> > +  store i32 %add, i32* @a, align 4
> > +  ret void
> > +}
> > +
> > +; Function Attrs: nounwind ssp
> > +; Testcase for <rdar://problem/15438605>, AdrpAdrp reuse is valid only
> when the first adrp
> > +; dominates the second.
> > +; The first adrp comes from the loading of 'a' and the second the
> loading of 'b'.
> > +; 'a' is loaded in if.then, 'b' in if.end4, if.then does not dominates
> if.end4.
> > +; CHECK-LABEL: _test
> > +; CHECK: ret
> > +; CHECK-NOT: .loh AdrpAdrp
> > +define i32 @test(i32 %t) {
> > +entry:
> > +  %cmp = icmp sgt i32 %t, 5
> > +  br i1 %cmp, label %if.then, label %if.end4
> > +
> > +if.then:                                          ; preds = %entry
> > +  %tmp = load i32* @a, align 4
> > +  %add = add nsw i32 %tmp, %t
> > +  %cmp1 = icmp sgt i32 %add, 12
> > +  br i1 %cmp1, label %if.then2, label %if.end4
> > +
> > +if.then2:                                         ; preds = %if.then
> > +  tail call void @foo(i32 %add)
> > +  %tmp1 = load i32* @a, align 4
> > +  br label %if.end4
> > +
> > +if.end4:                                          ; preds = %if.then2,
> %if.then, %entry
> > +  %t.addr.0 = phi i32 [ %tmp1, %if.then2 ], [ %t, %if.then ], [ %t,
> %entry ]
> > +  %tmp2 = load i32* @b, align 4
> > +  %add5 = add nsw i32 %tmp2, %t.addr.0
> > +  tail call void @foo(i32 %add5)
> > +  %tmp3 = load i32* @b, align 4
> > +  %add6 = add nsw i32 %tmp3, %t.addr.0
> > +  ret i32 %add6
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,17 @@
> > +; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o /dev/null %s
> > +
> > +        .text
> > +        .globl _foo
> > +        .cfi_startproc
> > +_foo:
> > +        stp x29, x30, [sp, #-16]!
> > + .cfi_adjust_cfa_offset 16
> > +
> > +        ldp x29, x30, [sp], #16
> > + .cfi_adjust_cfa_offset -16
> > +        .cfi_restore x29
> > +        .cfi_restore x30
> > +
> > +        ret
> > +
> > +        .cfi_endproc
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/complex-ret.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/complex-ret.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/complex-ret.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/complex-ret.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,7 @@
> > +; RUN: llc -march=arm64 -o - %s | FileCheck %s
> > +
> > +define { i192, i192, i21, i192 } @foo(i192) {
> > +; CHECK-LABEL: foo:
> > +; CHECK: stp xzr, xzr, [x8]
> > +  ret { i192, i192, i21, i192 } {i192 0, i192 1, i21 2, i192 3}
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/convert-v2f64-v2i32.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/convert-v2f64-v2i32.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/convert-v2f64-v2i32.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/convert-v2f64-v2i32.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,24 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +; CHECK: fptosi_1
> > +; CHECK: fcvtzs.2d
> > +; CHECK: xtn.2s
> > +; CHECK: ret
> > +define void @fptosi_1() nounwind noinline ssp {
> > +entry:
> > +  %0 = fptosi <2 x double> undef to <2 x i32>
> > +  store <2 x i32> %0, <2 x i32>* undef, align 8
> > +  ret void
> > +}
> > +
> > +; CHECK: fptoui_1
> > +; CHECK: fcvtzu.2d
> > +; CHECK: xtn.2s
> > +; CHECK: ret
> > +define void @fptoui_1() nounwind noinline ssp {
> > +entry:
> > +  %0 = fptoui <2 x double> undef to <2 x i32>
> > +  store <2 x i32> %0, <2 x i32>* undef, align 8
> > +  ret void
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/convert-v2i32-v2f64.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/convert-v2i32-v2f64.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/convert-v2i32-v2f64.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/convert-v2i32-v2f64.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,29 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
> > +; CHECK-LABEL: f1:
> > +; CHECK: sshll.2d v0, v0, #0
> > +; CHECK-NEXT: scvtf.2d v0, v0
> > +; CHECK-NEXT: ret
> > +  %conv = sitofp <2 x i32> %v to <2 x double>
> > +  ret <2 x double> %conv
> > +}
> > +define <2 x double> @f2(<2 x i32> %v) nounwind readnone {
> > +; CHECK-LABEL: f2:
> > +; CHECK: ushll.2d v0, v0, #0
> > +; CHECK-NEXT: ucvtf.2d v0, v0
> > +; CHECK-NEXT: ret
> > +  %conv = uitofp <2 x i32> %v to <2 x double>
> > +  ret <2 x double> %conv
> > +}
> > +
> > +; CHECK: autogen_SD19655
> > +; CHECK: scvtf
> > +; CHECK: ret
> > +define void @autogen_SD19655() {
> > +  %T = load <2 x i64>* undef
> > +  %F = sitofp <2 x i64> undef to <2 x float>
> > +  store <2 x float> %F, <2 x float>* undef
> > +  ret void
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/copy-tuple.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/copy-tuple.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/copy-tuple.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/copy-tuple.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,146 @@
> > +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
> > +
> > +; The main purpose of this test is to find out whether copyPhysReg can
> deal with
> > +; the memmove-like situation arising in tuples, where an early copy can
> clobber
> > +; the value needed by a later one if the tuples overlap.
> > +
> > +; We use dummy inline asm to force LLVM to generate a COPY between the
> registers
> > +; we want by clobbering all the others.
> > +
> > +define void @test_D1D2_from_D0D1(i8* %addr) #0 {
> > +; CHECK-LABEL: test_D1D2_from_D0D1:
> > +; CHECK: orr.8b v2, v1
> > +; CHECK: orr.8b v1, v0
> > +entry:
> > +  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
> > +  %vec = tail call { <8 x i8>, <8 x i8> }
> @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
> > +  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
> > +  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
> > +  tail call void asm sideeffect "",
> "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +
> > +  tail call void asm sideeffect "",
> "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @test_D0D1_from_D1D2(i8* %addr) #0 {
> > +; CHECK-LABEL: test_D0D1_from_D1D2:
> > +; CHECK: orr.8b v0, v1
> > +; CHECK: orr.8b v1, v2
> > +entry:
> > +  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
> > +  %vec = tail call { <8 x i8>, <8 x i8> }
> @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
> > +  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
> > +  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
> > +  tail call void asm sideeffect "",
> "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +
> > +  tail call void asm sideeffect "",
> "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @test_D0D1_from_D31D0(i8* %addr) #0 {
> > +; CHECK-LABEL: test_D0D1_from_D31D0:
> > +; CHECK: orr.8b v1, v0
> > +; CHECK: orr.8b v0, v31
> > +entry:
> > +  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
> > +  %vec = tail call { <8 x i8>, <8 x i8> }
> @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
> > +  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
> > +  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
> > +  tail call void asm sideeffect "",
> "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +
> > +  tail call void asm sideeffect "",
> "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @test_D31D0_from_D0D1(i8* %addr) #0 {
> > +; CHECK-LABEL: test_D31D0_from_D0D1:
> > +; CHECK: orr.8b v31, v0
> > +; CHECK: orr.8b v0, v1
> > +entry:
> > +  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
> > +  %vec = tail call { <8 x i8>, <8 x i8> }
> @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
> > +  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
> > +  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
> > +  tail call void asm sideeffect "",
> "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +
> > +  tail call void asm sideeffect "",
> "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
> > +  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 {
> > +; CHECK-LABEL: test_D2D3D4_from_D0D1D2:
> > +; CHECK: orr.8b v4, v2
> > +; CHECK: orr.8b v3, v1
> > +; CHECK: orr.8b v2, v0
> > +entry:
> > +  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
> > +  %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> }
> @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
> > +  %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0
> > +  %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1
> > +  %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2
> > +
> > +  tail call void asm sideeffect "",
> "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, <8 x i8> %vec2, i8* %addr)
> > +
> > +  tail call void asm sideeffect "",
> "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x
> i8> %vec1, <8 x i8> %vec2, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 {
> > +; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3:
> > +; CHECK: orr.16b v0, v1
> > +; CHECK: orr.16b v1, v2
> > +; CHECK: orr.16b v2, v3
> > +entry:
> > +  %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
> > +  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> }
> @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
> > +  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
> > +  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
> > +  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
> > +  tail call void asm sideeffect "",
> "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x
> i8> %vec1, <16 x i8> %vec2, i8* %addr)
> > +
> > +  tail call void asm sideeffect "",
> "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x
> i8> %vec1, <16 x i8> %vec2, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 {
> > +; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1:
> > +; CHECK: orr.16b v4, v1
> > +; CHECK: orr.16b v3, v0
> > +; CHECK: orr.16b v2, v31
> > +; CHECK: orr.16b v1, v30
> > +  %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
> > +  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
> @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
> > +  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
> %vec, 0
> > +  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
> %vec, 1
> > +  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
> %vec, 2
> > +  %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
> %vec, 3
> > +
> > +  tail call void asm sideeffect "",
> "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"()
> > +  tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x
> i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
> > +
> > +  tail call void asm sideeffect "",
> "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
> > +  tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x
> i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
> > +  ret void
> > +}
> > +
> > +declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x
> i8>*)
> > +declare { <8 x i8>, <8 x i8>, <8 x i8> }
> @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>*)
> > +declare { <16 x i8>, <16 x i8>, <16 x i8> }
> @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>*)
> > +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
> @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>*)
> > +
> > +declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
> > +declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x
> i8>, i8*)
> > +declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16
> x i8>, i8*)
> > +declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16
> x i8>, <16 x i8>, i8*)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/crc32.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/crc32.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/crc32.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/crc32.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,71 @@
> > +; RUN: llc -march=arm64 -o - %s | FileCheck %s
> > +
> > +define i32 @test_crc32b(i32 %cur, i8 %next) {
> > +; CHECK-LABEL: test_crc32b:
> > +; CHECK: crc32b w0, w0, w1
> > +  %bits = zext i8 %next to i32
> > +  %val = call i32 @llvm.arm64.crc32b(i32 %cur, i32 %bits)
> > +  ret i32 %val
> > +}
> > +
> > +define i32 @test_crc32h(i32 %cur, i16 %next) {
> > +; CHECK-LABEL: test_crc32h:
> > +; CHECK: crc32h w0, w0, w1
> > +  %bits = zext i16 %next to i32
> > +  %val = call i32 @llvm.arm64.crc32h(i32 %cur, i32 %bits)
> > +  ret i32 %val
> > +}
> > +
> > +define i32 @test_crc32w(i32 %cur, i32 %next) {
> > +; CHECK-LABEL: test_crc32w:
> > +; CHECK: crc32w w0, w0, w1
> > +  %val = call i32 @llvm.arm64.crc32w(i32 %cur, i32 %next)
> > +  ret i32 %val
> > +}
> > +
> > +define i32 @test_crc32x(i32 %cur, i64 %next) {
> > +; CHECK-LABEL: test_crc32x:
> > +; CHECK: crc32x w0, w0, x1
> > +  %val = call i32 @llvm.arm64.crc32x(i32 %cur, i64 %next)
> > +  ret i32 %val
> > +}
> > +
> > +define i32 @test_crc32cb(i32 %cur, i8 %next) {
> > +; CHECK-LABEL: test_crc32cb:
> > +; CHECK: crc32cb w0, w0, w1
> > +  %bits = zext i8 %next to i32
> > +  %val = call i32 @llvm.arm64.crc32cb(i32 %cur, i32 %bits)
> > +  ret i32 %val
> > +}
> > +
> > +define i32 @test_crc32ch(i32 %cur, i16 %next) {
> > +; CHECK-LABEL: test_crc32ch:
> > +; CHECK: crc32ch w0, w0, w1
> > +  %bits = zext i16 %next to i32
> > +  %val = call i32 @llvm.arm64.crc32ch(i32 %cur, i32 %bits)
> > +  ret i32 %val
> > +}
> > +
> > +define i32 @test_crc32cw(i32 %cur, i32 %next) {
> > +; CHECK-LABEL: test_crc32cw:
> > +; CHECK: crc32cw w0, w0, w1
> > +  %val = call i32 @llvm.arm64.crc32cw(i32 %cur, i32 %next)
> > +  ret i32 %val
> > +}
> > +
> > +define i32 @test_crc32cx(i32 %cur, i64 %next) {
> > +; CHECK-LABEL: test_crc32cx:
> > +; CHECK: crc32cx w0, w0, x1
> > +  %val = call i32 @llvm.arm64.crc32cx(i32 %cur, i64 %next)
> > +  ret i32 %val
> > +}
> > +
> > +declare i32 @llvm.arm64.crc32b(i32, i32)
> > +declare i32 @llvm.arm64.crc32h(i32, i32)
> > +declare i32 @llvm.arm64.crc32w(i32, i32)
> > +declare i32 @llvm.arm64.crc32x(i32, i64)
> > +
> > +declare i32 @llvm.arm64.crc32cb(i32, i32)
> > +declare i32 @llvm.arm64.crc32ch(i32, i32)
> > +declare i32 @llvm.arm64.crc32cw(i32, i32)
> > +declare i32 @llvm.arm64.crc32cx(i32, i64)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/crypto.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/crypto.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/crypto.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/crypto.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,135 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s | FileCheck %s
> > +
> > +declare <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8>
> %key)
> > +declare <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8>
> %key)
> > +declare <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data)
> > +declare <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data)
> > +
> > +define <16 x i8> @test_aese(<16 x i8> %data, <16 x i8> %key) {
> > +; CHECK-LABEL: test_aese:
> > +; CHECK: aese.16b v0, v1
> > +  %res = call <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x
> i8> %key)
> > +  ret <16 x i8> %res
> > +}
> > +
> > +define <16 x i8> @test_aesd(<16 x i8> %data, <16 x i8> %key) {
> > +; CHECK-LABEL: test_aesd:
> > +; CHECK: aesd.16b v0, v1
> > +  %res = call <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x
> i8> %key)
> > +  ret <16 x i8> %res
> > +}
> > +
> > +define <16 x i8> @test_aesmc(<16 x i8> %data) {
> > +; CHECK-LABEL: test_aesmc:
> > +; CHECK: aesmc.16b v0, v0
> > + %res = call <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data)
> > +  ret <16 x i8> %res
> > +}
> > +
> > +define <16 x i8> @test_aesimc(<16 x i8> %data) {
> > +; CHECK-LABEL: test_aesimc:
> > +; CHECK: aesimc.16b v0, v0
> > + %res = call <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data)
> > +  ret <16 x i8> %res
> > +}
> > +
> > +declare <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32
> %hash_e, <4 x i32> %wk)
> > +declare <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32
> %hash_e, <4 x i32> %wk)
> > +declare <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32
> %hash_e, <4 x i32> %wk)
> > +declare i32 @llvm.arm64.crypto.sha1h(i32 %hash_e)
> > +declare <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x
> i32> %wk4_7, <4 x i32> %wk8_11)
> > +declare <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x
> i32> %wk12_15)
> > +
> > +define <4 x i32> @test_sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x
> i32> %wk) {
> > +; CHECK-LABEL: test_sha1c:
> > +; CHECK: fmov [[HASH_E:s[0-9]+]], w0
> > +; CHECK: sha1c.4s q0, [[HASH_E]], v1
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd,
> i32 %hash_e, <4 x i32> %wk)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +; <rdar://problem/14742333> Incomplete removal of unnecessary FMOV
> instructions in intrinsic SHA1
> > +define <4 x i32> @test_sha1c_in_a_row(<4 x i32> %hash_abcd, i32
> %hash_e, <4 x i32> %wk) {
> > +; CHECK-LABEL: test_sha1c_in_a_row:
> > +; CHECK: fmov [[HASH_E:s[0-9]+]], w0
> > +; CHECK: sha1c.4s q[[SHA1RES:[0-9]+]], [[HASH_E]], v1
> > +; CHECK-NOT: fmov
> > +; CHECK: sha1c.4s q0, s[[SHA1RES]], v1
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd,
> i32 %hash_e, <4 x i32> %wk)
> > +  %extract = extractelement <4 x i32> %res, i32 0
> > +  %res2 = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd,
> i32 %extract, <4 x i32> %wk)
> > +  ret <4 x i32> %res2
> > +}
> > +
> > +define <4 x i32> @test_sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x
> i32> %wk) {
> > +; CHECK-LABEL: test_sha1p:
> > +; CHECK: fmov [[HASH_E:s[0-9]+]], w0
> > +; CHECK: sha1p.4s q0, [[HASH_E]], v1
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd,
> i32 %hash_e, <4 x i32> %wk)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +define <4 x i32> @test_sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x
> i32> %wk) {
> > +; CHECK-LABEL: test_sha1m:
> > +; CHECK: fmov [[HASH_E:s[0-9]+]], w0
> > +; CHECK: sha1m.4s q0, [[HASH_E]], v1
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd,
> i32 %hash_e, <4 x i32> %wk)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +define i32 @test_sha1h(i32 %hash_e) {
> > +; CHECK-LABEL: test_sha1h:
> > +; CHECK: fmov [[HASH_E:s[0-9]+]], w0
> > +; CHECK: sha1h [[RES:s[0-9]+]], [[HASH_E]]
> > +; CHECK: fmov w0, [[RES]]
> > +  %res = call i32 @llvm.arm64.crypto.sha1h(i32 %hash_e)
> > +  ret i32 %res
> > +}
> > +
> > +define <4 x i32> @test_sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x
> i32> %wk8_11) {
> > +; CHECK-LABEL: test_sha1su0:
> > +; CHECK: sha1su0.4s v0, v1, v2
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4
> x i32> %wk4_7, <4 x i32> %wk8_11)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +define <4 x i32> @test_sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) {
> > +; CHECK-LABEL: test_sha1su1:
> > +; CHECK: sha1su1.4s v0, v1
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4
> x i32> %wk12_15)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +declare <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x
> i32> %hash_efgh, <4 x i32> %wk)
> > +declare <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4
> x i32> %hash_abcd, <4 x i32> %wk)
> > +declare <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x
> i32> %w4_7)
> > +declare <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x
> i32> %w8_11, <4 x i32> %w12_15)
> > +
> > +define <4 x i32> @test_sha256h(<4 x i32> %hash_abcd, <4 x i32>
> %hash_efgh, <4 x i32> %wk) {
> > +; CHECK-LABEL: test_sha256h:
> > +; CHECK: sha256h.4s q0, q1, v2
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32>
> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +define <4 x i32> @test_sha256h2(<4 x i32> %hash_efgh, <4 x i32>
> %hash_abcd, <4 x i32> %wk) {
> > +; CHECK-LABEL: test_sha256h2:
> > +; CHECK: sha256h2.4s q0, q1, v2
> > +
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32>
> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +define <4 x i32> @test_sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) {
> > +; CHECK-LABEL: test_sha256su0:
> > +; CHECK: sha256su0.4s v0, v1
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3,
> <4 x i32> %w4_7)
> > +  ret <4 x i32> %res
> > +}
> > +
> > +define <4 x i32> @test_sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4
> x i32> %w12_15) {
> > +; CHECK-LABEL: test_sha256su1:
> > +; CHECK: sha256su1.4s v0, v1, v2
> > +  %res = call <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3,
> <4 x i32> %w8_11, <4 x i32> %w12_15)
> > +  ret <4 x i32> %res
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/cse.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/cse.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/cse.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/cse.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,59 @@
> > +; RUN: llc -O3 < %s | FileCheck %s
> > +target triple = "arm64-apple-ios"
> > +
> > +; rdar://12462006
> > +; CSE between "icmp reg reg" and "sub reg reg".
> > +; Both can be in the same basic block or in different basic blocks.
> > +define i8* @t1(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: subs
> > +; CHECK-NOT: cmp
> > +; CHECK-NOT: sub
> > +; CHECK: b.ge
> > +; CHECK: sub
> > +; CHECK: sub
> > +; CHECK_NOT: sub
> > +; CHECK: ret
> > + %0 = load i32* %offset, align 4
> > + %cmp = icmp slt i32 %0, %size
> > + %s = sub nsw i32 %0, %size
> > + br i1 %cmp, label %return, label %if.end
> > +
> > +if.end:
> > + %sub = sub nsw i32 %0, %size
> > + %s2 = sub nsw i32 %s, %size
> > + %s3 = sub nsw i32 %sub, %s2
> > + store i32 %s3, i32* %offset, align 4
> > + %add.ptr = getelementptr inbounds i8* %base, i32 %sub
> > + br label %return
> > +
> > +return:
> > + %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
> > + ret i8* %retval.0
> > +}
> > +
> > +; CSE between "icmp reg imm" and "sub reg imm".
> > +define i8* @t2(i8* %base, i32* nocapture %offset) nounwind {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: subs
> > +; CHECK-NOT: cmp
> > +; CHECK-NOT: sub
> > +; CHECK: b.lt
> > +; CHECK-NOT: sub
> > +; CHECK: ret
> > + %0 = load i32* %offset, align 4
> > + %cmp = icmp slt i32 %0, 1
> > + br i1 %cmp, label %return, label %if.end
> > +
> > +if.end:
> > + %sub = sub nsw i32 %0, 1
> > + store i32 %sub, i32* %offset, align 4
> > + %add.ptr = getelementptr inbounds i8* %base, i32 %sub
> > + br label %return
> > +
> > +return:
> > + %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
> > + ret i8* %retval.0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/csel.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/csel.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/csel.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/csel.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,222 @@
> > +; RUN: llc -O3 < %s | FileCheck %s
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
> > +target triple = "arm64-unknown-unknown"
> > +
> > +; CHECK: foo1
> > +; CHECK: csinc w{{[0-9]+}}, w[[REG:[0-9]+]],
> > +; CHECK:                                     w[[REG]], eq
> > +define i32 @foo1(i32 %b, i32 %c) nounwind readnone ssp {
> > +entry:
> > +  %not.tobool = icmp ne i32 %c, 0
> > +  %add = zext i1 %not.tobool to i32
> > +  %b.add = add i32 %c, %b
> > +  %add1 = add i32 %b.add, %add
> > +  ret i32 %add1
> > +}
> > +
> > +; CHECK: foo2
> > +; CHECK: csneg w{{[0-9]+}}, w[[REG:[0-9]+]],
> > +; CHECK:                                     w[[REG]], eq
> > +define i32 @foo2(i32 %b, i32 %c) nounwind readnone ssp {
> > +entry:
> > +  %mul = sub i32 0, %b
> > +  %tobool = icmp eq i32 %c, 0
> > +  %b.mul = select i1 %tobool, i32 %b, i32 %mul
> > +  %add = add nsw i32 %b.mul, %c
> > +  ret i32 %add
> > +}
> > +
> > +; CHECK: foo3
> > +; CHECK: csinv w{{[0-9]+}}, w[[REG:[0-9]+]],
> > +; CHECK:                                     w[[REG]], eq
> > +define i32 @foo3(i32 %b, i32 %c) nounwind readnone ssp {
> > +entry:
> > +  %not.tobool = icmp ne i32 %c, 0
> > +  %xor = sext i1 %not.tobool to i32
> > +  %b.xor = xor i32 %xor, %b
> > +  %add = add nsw i32 %b.xor, %c
> > +  ret i32 %add
> > +}
> > +
> > +; rdar://11632325
> > +define i32 at foo4(i32 %a) nounwind ssp {
> > +; CHECK: foo4
> > +; CHECK: csneg
> > +; CHECK-NEXT: ret
> > +  %cmp = icmp sgt i32 %a, -1
> > +  %neg = sub nsw i32 0, %a
> > +  %cond = select i1 %cmp, i32 %a, i32 %neg
> > +  ret i32 %cond
> > +}
> > +
> > +define i32 at foo5(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: foo5
> > +; CHECK: subs
> > +; CHECK-NEXT: csneg
> > +; CHECK-NEXT: ret
> > +  %sub = sub nsw i32 %a, %b
> > +  %cmp = icmp sgt i32 %sub, -1
> > +  %sub3 = sub nsw i32 0, %sub
> > +  %cond = select i1 %cmp, i32 %sub, i32 %sub3
> > +  ret i32 %cond
> > +}
> > +
> > +; make sure we can handle branch instruction in optimizeCompare.
> > +define i32 at foo6(i32 %a, i32 %b) nounwind ssp {
> > +; CHECK: foo6
> > +; CHECK: b
> > +  %sub = sub nsw i32 %a, %b
> > +  %cmp = icmp sgt i32 %sub, 0
> > +  br i1 %cmp, label %l.if, label %l.else
> > +
> > +l.if:
> > +  ret i32 1
> > +
> > +l.else:
> > +  ret i32 %sub
> > +}
> > +
> > +; If CPSR is used multiple times and V flag is used, we don't remove
> cmp.
> > +define i32 @foo7(i32 %a, i32 %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: foo7:
> > +; CHECK: sub
> > +; CHECK-next: adds
> > +; CHECK-next: csneg
> > +; CHECK-next: b
> > +  %sub = sub nsw i32 %a, %b
> > +  %cmp = icmp sgt i32 %sub, -1
> > +  %sub3 = sub nsw i32 0, %sub
> > +  %cond = select i1 %cmp, i32 %sub, i32 %sub3
> > +  br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then:
> > +  %cmp2 = icmp slt i32 %sub, -1
> > +  %sel = select i1 %cmp2, i32 %cond, i32 %a
> > +  ret i32 %sel
> > +
> > +if.else:
> > +  ret i32 %cond
> > +}
> > +
> > +define i32 @foo8(i32 %v, i32 %a, i32 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: foo8:
> > +; CHECK: cmp w0, #0
> > +; CHECK: csinv w0, w1, w2, ne
> > +  %tobool = icmp eq i32 %v, 0
> > +  %neg = xor i32 -1, %b
> > +  %cond = select i1 %tobool, i32 %neg, i32 %a
> > +  ret i32 %cond
> > +}
> > +
> > +define i32 @foo9(i32 %v) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo9:
> > +; CHECK: cmp w0, #0
> > +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
> > +; CHECK: csinv w0, w[[REG]], w[[REG]], ne
> > +  %tobool = icmp ne i32 %v, 0
> > +  %cond = select i1 %tobool, i32 4, i32 -5
> > +  ret i32 %cond
> > +}
> > +
> > +define i64 @foo10(i64 %v) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo10:
> > +; CHECK: cmp x0, #0
> > +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4
> > +; CHECK: csinv x0, x[[REG]], x[[REG]], ne
> > +  %tobool = icmp ne i64 %v, 0
> > +  %cond = select i1 %tobool, i64 4, i64 -5
> > +  ret i64 %cond
> > +}
> > +
> > +define i32 @foo11(i32 %v) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo11:
> > +; CHECK: cmp w0, #0
> > +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
> > +; CHECK: csneg w0, w[[REG]], w[[REG]], ne
> > +  %tobool = icmp ne i32 %v, 0
> > +  %cond = select i1 %tobool, i32 4, i32 -4
> > +  ret i32 %cond
> > +}
> > +
> > +define i64 @foo12(i64 %v) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo12:
> > +; CHECK: cmp x0, #0
> > +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4
> > +; CHECK: csneg x0, x[[REG]], x[[REG]], ne
> > +  %tobool = icmp ne i64 %v, 0
> > +  %cond = select i1 %tobool, i64 4, i64 -4
> > +  ret i64 %cond
> > +}
> > +
> > +define i32 @foo13(i32 %v, i32 %a, i32 %b) nounwind readnone optsize ssp
> {
> > +entry:
> > +; CHECK-LABEL: foo13:
> > +; CHECK: cmp w0, #0
> > +; CHECK: csneg w0, w1, w2, ne
> > +  %tobool = icmp eq i32 %v, 0
> > +  %sub = sub i32 0, %b
> > +  %cond = select i1 %tobool, i32 %sub, i32 %a
> > +  ret i32 %cond
> > +}
> > +
> > +define i64 @foo14(i64 %v, i64 %a, i64 %b) nounwind readnone optsize ssp
> {
> > +entry:
> > +; CHECK-LABEL: foo14:
> > +; CHECK: cmp x0, #0
> > +; CHECK: csneg x0, x1, x2, ne
> > +  %tobool = icmp eq i64 %v, 0
> > +  %sub = sub i64 0, %b
> > +  %cond = select i1 %tobool, i64 %sub, i64 %a
> > +  ret i64 %cond
> > +}
> > +
> > +define i32 @foo15(i32 %a, i32 %b) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo15:
> > +; CHECK: cmp w0, w1
> > +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
> > +; CHECK: csinc w0, w[[REG]], w[[REG]], le
> > +  %cmp = icmp sgt i32 %a, %b
> > +  %. = select i1 %cmp, i32 2, i32 1
> > +  ret i32 %.
> > +}
> > +
> > +define i32 @foo16(i32 %a, i32 %b) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo16:
> > +; CHECK: cmp w0, w1
> > +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
> > +; CHECK: csinc w0, w[[REG]], w[[REG]], gt
> > +  %cmp = icmp sgt i32 %a, %b
> > +  %. = select i1 %cmp, i32 1, i32 2
> > +  ret i32 %.
> > +}
> > +
> > +define i64 @foo17(i64 %a, i64 %b) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo17:
> > +; CHECK: cmp x0, x1
> > +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1
> > +; CHECK: csinc x0, x[[REG]], x[[REG]], le
> > +  %cmp = icmp sgt i64 %a, %b
> > +  %. = select i1 %cmp, i64 2, i64 1
> > +  ret i64 %.
> > +}
> > +
> > +define i64 @foo18(i64 %a, i64 %b) nounwind readnone optsize ssp {
> > +entry:
> > +; CHECK-LABEL: foo18:
> > +; CHECK: cmp x0, x1
> > +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1
> > +; CHECK: csinc x0, x[[REG]], x[[REG]], gt
> > +  %cmp = icmp sgt i64 %a, %b
> > +  %. = select i1 %cmp, i64 1, i64 2
> > +  ret i64 %.
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/cvt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/cvt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/cvt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/cvt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,401 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +;
> > +; Floating-point scalar convert to signed integer (to nearest with ties
> to away)
> > +;
> > +define i32 @fcvtas_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtas_1w1s:
> > +;CHECK: fcvtas w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtas_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtas_1x1s:
> > +;CHECK: fcvtas x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtas_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtas_1w1d:
> > +;CHECK: fcvtas w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtas_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtas_1x1d:
> > +;CHECK: fcvtas x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtas.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtas.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtas.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtas.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to unsigned integer
> > +;
> > +define i32 @fcvtau_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtau_1w1s:
> > +;CHECK: fcvtau w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtau_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtau_1x1s:
> > +;CHECK: fcvtau x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtau_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtau_1w1d:
> > +;CHECK: fcvtau w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtau_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtau_1x1d:
> > +;CHECK: fcvtau x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtau.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtau.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtau.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtau.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to signed integer (toward -Inf)
> > +;
> > +define i32 @fcvtms_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtms_1w1s:
> > +;CHECK: fcvtms w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtms_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtms_1x1s:
> > +;CHECK: fcvtms x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtms_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtms_1w1d:
> > +;CHECK: fcvtms w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtms_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtms_1x1d:
> > +;CHECK: fcvtms x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtms.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtms.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtms.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtms.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to unsigned integer (toward -Inf)
> > +;
> > +define i32 @fcvtmu_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtmu_1w1s:
> > +;CHECK: fcvtmu w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtmu_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtmu_1x1s:
> > +;CHECK: fcvtmu x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtmu_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtmu_1w1d:
> > +;CHECK: fcvtmu w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtmu_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtmu_1x1d:
> > +;CHECK: fcvtmu x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtmu.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtmu.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtmu.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtmu.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to signed integer (to nearest with ties
> to even)
> > +;
> > +define i32 @fcvtns_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtns_1w1s:
> > +;CHECK: fcvtns w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtns_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtns_1x1s:
> > +;CHECK: fcvtns x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtns_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtns_1w1d:
> > +;CHECK: fcvtns w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtns_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtns_1x1d:
> > +;CHECK: fcvtns x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtns.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtns.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtns.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtns.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to unsigned integer (to nearest with
> ties to even)
> > +;
> > +define i32 @fcvtnu_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtnu_1w1s:
> > +;CHECK: fcvtnu w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtnu_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtnu_1x1s:
> > +;CHECK: fcvtnu x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtnu_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtnu_1w1d:
> > +;CHECK: fcvtnu w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtnu_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtnu_1x1d:
> > +;CHECK: fcvtnu x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtnu.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtnu.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtnu.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtnu.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to signed integer (toward +Inf)
> > +;
> > +define i32 @fcvtps_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtps_1w1s:
> > +;CHECK: fcvtps w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtps_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtps_1x1s:
> > +;CHECK: fcvtps x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtps_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtps_1w1d:
> > +;CHECK: fcvtps w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtps_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtps_1x1d:
> > +;CHECK: fcvtps x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtps.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtps.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtps.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtps.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to unsigned integer (toward +Inf)
> > +;
> > +define i32 @fcvtpu_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtpu_1w1s:
> > +;CHECK: fcvtpu w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtpu_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtpu_1x1s:
> > +;CHECK: fcvtpu x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtpu_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtpu_1w1d:
> > +;CHECK: fcvtpu w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtpu_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtpu_1x1d:
> > +;CHECK: fcvtpu x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtpu.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtpu.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtpu.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtpu.i64.f64(double) nounwind readnone
> > +
> > +;
> > +;  Floating-point scalar convert to signed integer (toward zero)
> > +;
> > +define i32 @fcvtzs_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtzs_1w1s:
> > +;CHECK: fcvtzs w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtzs_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtzs_1x1s:
> > +;CHECK: fcvtzs x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtzs_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtzs_1w1d:
> > +;CHECK: fcvtzs w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtzs_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtzs_1x1d:
> > +;CHECK: fcvtzs x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtzs.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtzs.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtzs.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtzs.i64.f64(double) nounwind readnone
> > +
> > +;
> > +; Floating-point scalar convert to unsigned integer (toward zero)
> > +;
> > +define i32 @fcvtzu_1w1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtzu_1w1s:
> > +;CHECK: fcvtzu w0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f32(float %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtzu_1x1s(float %A) nounwind {
> > +;CHECK-LABEL: fcvtzu_1x1s:
> > +;CHECK: fcvtzu x0, s0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f32(float %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +define i32 @fcvtzu_1w1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtzu_1w1d:
> > +;CHECK: fcvtzu w0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f64(double %A)
> > +       ret i32 %tmp3
> > +}
> > +
> > +define i64 @fcvtzu_1x1d(double %A) nounwind {
> > +;CHECK-LABEL: fcvtzu_1x1d:
> > +;CHECK: fcvtzu x0, d0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f64(double %A)
> > +       ret i64 %tmp3
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.fcvtzu.i32.f32(float) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtzu.i64.f32(float) nounwind readnone
> > +declare i32 @llvm.arm64.neon.fcvtzu.i32.f64(double) nounwind readnone
> > +declare i64 @llvm.arm64.neon.fcvtzu.i64.f64(double) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/dagcombiner-convergence.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/dagcombiner-convergence.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/dagcombiner-convergence.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/dagcombiner-convergence.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,19 @@
> > +; RUN: llc < %s -o /dev/null
> > +; rdar://10795250
> > +; DAGCombiner should converge.
> > +
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
> > +target triple = "arm64-apple-macosx10.8.0"
> > +
> > +define i64 @foo(i128 %Params.coerce, i128 %SelLocs.coerce) {
> > +entry:
> > +  %tmp = lshr i128 %Params.coerce, 61
> > +  %.tr38.i = trunc i128 %tmp to i64
> > +  %mul.i = and i64 %.tr38.i, 4294967288
> > +  %tmp1 = lshr i128 %SelLocs.coerce, 62
> > +  %.tr.i = trunc i128 %tmp1 to i64
> > +  %mul7.i = and i64 %.tr.i, 4294967292
> > +  %add.i = add i64 %mul7.i, %mul.i
> > +  %conv.i.i = and i64 %add.i, 4294967292
> > +  ret i64 %conv.i.i
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/dagcombiner-load-slicing.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/dagcombiner-load-slicing.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/dagcombiner-load-slicing.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/dagcombiner-load-slicing.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,102 @@
> > +; RUN: llc -mtriple arm64-apple-ios -O3 -o - < %s | FileCheck %s
> > +; <rdar://problem/14477220>
> > +
> > +%class.Complex = type { float, float }
> > +%class.Complex_int = type { i32, i32 }
> > +%class.Complex_long = type { i64, i64 }
> > +
> > +; CHECK-LABEL: @test
> > +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3
> > +; CHECK: ldp [[CPLX1_I:s[0-9]+]], [[CPLX1_R:s[0-9]+]], {{\[}}[[BASE]]]
> > +; CHECK: ldp [[CPLX2_I:s[0-9]+]], [[CPLX2_R:s[0-9]+]], {{\[}}[[BASE]],
> #64]
> > +; CHECK: fadd {{s[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
> > +; CHECK: fadd {{s[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
> > +; CHECK: ret
> > +define void @test(%class.Complex* nocapture %out, i64 %out_start) {
> > +entry:
> > +  %arrayidx = getelementptr inbounds %class.Complex* %out, i64
> %out_start
> > +  %0 = bitcast %class.Complex* %arrayidx to i64*
> > +  %1 = load i64* %0, align 4
> > +  %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32
> > +  %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float
> > +  %t0.sroa.2.0.extract.shift = lshr i64 %1, 32
> > +  %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to
> i32
> > +  %3 = bitcast i32 %t0.sroa.2.0.extract.trunc to float
> > +  %add = add i64 %out_start, 8
> > +  %arrayidx2 = getelementptr inbounds %class.Complex* %out, i64 %add
> > +  %i.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 0
> > +  %4 = load float* %i.i, align 4
> > +  %add.i = fadd float %4, %2
> > +  %retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef,
> float %add.i, i32 0
> > +  %r.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 1
> > +  %5 = load float* %r.i, align 4
> > +  %add5.i = fadd float %5, %3
> > +  %retval.sroa.0.4.vec.insert.i = insertelement <2 x float>
> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1
> > +  %ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x
> float>*
> > +  store <2 x float> %retval.sroa.0.4.vec.insert.i, <2 x float>*
> %ref.tmp.sroa.0.0.cast, align 4
> > +  ret void
> > +}
> > +
> > +; CHECK-LABEL: @test_int
> > +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3
> > +; CHECK: ldp [[CPLX1_I:w[0-9]+]], [[CPLX1_R:w[0-9]+]], {{\[}}[[BASE]]]
> > +; CHECK: ldp [[CPLX2_I:w[0-9]+]], [[CPLX2_R:w[0-9]+]], {{\[}}[[BASE]],
> #64]
> > +; CHECK: add {{w[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
> > +; CHECK: add {{w[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
> > +; CHECK: ret
> > +define void @test_int(%class.Complex_int* nocapture %out, i64
> %out_start) {
> > +entry:
> > +  %arrayidx = getelementptr inbounds %class.Complex_int* %out, i64
> %out_start
> > +  %0 = bitcast %class.Complex_int* %arrayidx to i64*
> > +  %1 = load i64* %0, align 4
> > +  %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32
> > +  %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to i32
> > +  %t0.sroa.2.0.extract.shift = lshr i64 %1, 32
> > +  %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to
> i32
> > +  %3 = bitcast i32 %t0.sroa.2.0.extract.trunc to i32
> > +  %add = add i64 %out_start, 8
> > +  %arrayidx2 = getelementptr inbounds %class.Complex_int* %out, i64 %add
> > +  %i.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0,
> i32 0
> > +  %4 = load i32* %i.i, align 4
> > +  %add.i = add i32 %4, %2
> > +  %retval.sroa.0.0.vec.insert.i = insertelement <2 x i32> undef, i32
> %add.i, i32 0
> > +  %r.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0,
> i32 1
> > +  %5 = load i32* %r.i, align 4
> > +  %add5.i = add i32 %5, %3
> > +  %retval.sroa.0.4.vec.insert.i = insertelement <2 x i32>
> %retval.sroa.0.0.vec.insert.i, i32 %add5.i, i32 1
> > +  %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_int* %arrayidx to <2
> x i32>*
> > +  store <2 x i32> %retval.sroa.0.4.vec.insert.i, <2 x i32>*
> %ref.tmp.sroa.0.0.cast, align 4
> > +  ret void
> > +}
> > +
> > +; CHECK-LABEL: @test_long
> > +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #4
> > +; CHECK: ldp [[CPLX1_I:x[0-9]+]], [[CPLX1_R:x[0-9]+]], {{\[}}[[BASE]]]
> > +; CHECK: ldp [[CPLX2_I:x[0-9]+]], [[CPLX2_R:x[0-9]+]], {{\[}}[[BASE]],
> #128]
> > +; CHECK: add {{x[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
> > +; CHECK: add {{x[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
> > +; CHECK: ret
> > +define void @test_long(%class.Complex_long* nocapture %out, i64
> %out_start) {
> > +entry:
> > +  %arrayidx = getelementptr inbounds %class.Complex_long* %out, i64
> %out_start
> > +  %0 = bitcast %class.Complex_long* %arrayidx to i128*
> > +  %1 = load i128* %0, align 4
> > +  %t0.sroa.0.0.extract.trunc = trunc i128 %1 to i64
> > +  %2 = bitcast i64 %t0.sroa.0.0.extract.trunc to i64
> > +  %t0.sroa.2.0.extract.shift = lshr i128 %1, 64
> > +  %t0.sroa.2.0.extract.trunc = trunc i128 %t0.sroa.2.0.extract.shift to
> i64
> > +  %3 = bitcast i64 %t0.sroa.2.0.extract.trunc to i64
> > +  %add = add i64 %out_start, 8
> > +  %arrayidx2 = getelementptr inbounds %class.Complex_long* %out, i64
> %add
> > +  %i.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0,
> i32 0
> > +  %4 = load i64* %i.i, align 4
> > +  %add.i = add i64 %4, %2
> > +  %retval.sroa.0.0.vec.insert.i = insertelement <2 x i64> undef, i64
> %add.i, i32 0
> > +  %r.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0,
> i32 1
> > +  %5 = load i64* %r.i, align 4
> > +  %add5.i = add i64 %5, %3
> > +  %retval.sroa.0.4.vec.insert.i = insertelement <2 x i64>
> %retval.sroa.0.0.vec.insert.i, i64 %add5.i, i32 1
> > +  %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_long* %arrayidx to <2
> x i64>*
> > +  store <2 x i64> %retval.sroa.0.4.vec.insert.i, <2 x i64>*
> %ref.tmp.sroa.0.0.cast, align 4
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/dup.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/dup.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/dup.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/dup.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,322 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -asm-verbose=false | FileCheck %s
> > +
> > +define <8 x i8> @v_dup8(i8 %A) nounwind {
> > +;CHECK-LABEL: v_dup8:
> > +;CHECK: dup.8b
> > +       %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
> > +       %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
> > +       %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
> > +       %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
> > +       %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
> > +       %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
> > +       %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
> > +       %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
> > +       ret <8 x i8> %tmp8
> > +}
> > +
> > +define <4 x i16> @v_dup16(i16 %A) nounwind {
> > +;CHECK-LABEL: v_dup16:
> > +;CHECK: dup.4h
> > +       %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
> > +       %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
> > +       %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
> > +       %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @v_dup32(i32 %A) nounwind {
> > +;CHECK-LABEL: v_dup32:
> > +;CHECK: dup.2s
> > +       %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
> > +       %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
> > +       ret <2 x i32> %tmp2
> > +}
> > +
> > +define <2 x float> @v_dupfloat(float %A) nounwind {
> > +;CHECK-LABEL: v_dupfloat:
> > +;CHECK: dup.2s
> > +       %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32
> 0
> > +       %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
> > +       ret <2 x float> %tmp2
> > +}
> > +
> > +define <16 x i8> @v_dupQ8(i8 %A) nounwind {
> > +;CHECK-LABEL: v_dupQ8:
> > +;CHECK: dup.16b
> > +       %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
> > +       %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
> > +       %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
> > +       %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
> > +       %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
> > +       %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
> > +       %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
> > +       %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
> > +       %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
> > +       %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
> > +       %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
> > +       %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
> > +       %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
> > +       %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
> > +       %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
> > +       %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
> > +       ret <16 x i8> %tmp16
> > +}
> > +
> > +define <8 x i16> @v_dupQ16(i16 %A) nounwind {
> > +;CHECK-LABEL: v_dupQ16:
> > +;CHECK: dup.8h
> > +       %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
> > +       %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
> > +       %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
> > +       %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
> > +       %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
> > +       %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
> > +       %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
> > +       %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
> > +       ret <8 x i16> %tmp8
> > +}
> > +
> > +define <4 x i32> @v_dupQ32(i32 %A) nounwind {
> > +;CHECK-LABEL: v_dupQ32:
> > +;CHECK: dup.4s
> > +       %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
> > +       %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
> > +       %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
> > +       %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <4 x float> @v_dupQfloat(float %A) nounwind {
> > +;CHECK-LABEL: v_dupQfloat:
> > +;CHECK: dup.4s
> > +       %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32
> 0
> > +       %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
> > +       %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
> > +       %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
> > +       ret <4 x float> %tmp4
> > +}
> > +
> > +; Check to make sure it works with shuffles, too.
> > +
> > +define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
> > +;CHECK-LABEL: v_shuffledup8:
> > +;CHECK: dup.8b
> > +       %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
> > +       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
> zeroinitializer
> > +       ret <8 x i8> %tmp2
> > +}
> > +
> > +define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
> > +;CHECK-LABEL: v_shuffledup16:
> > +;CHECK: dup.4h
> > +       %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
> > +       %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x
> i32> zeroinitializer
> > +       ret <4 x i16> %tmp2
> > +}
> > +
> > +define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
> > +;CHECK-LABEL: v_shuffledup32:
> > +;CHECK: dup.2s
> > +       %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
> > +       %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x
> i32> zeroinitializer
> > +       ret <2 x i32> %tmp2
> > +}
> > +
> > +define <2 x float> @v_shuffledupfloat(float %A) nounwind {
> > +;CHECK-LABEL: v_shuffledupfloat:
> > +;CHECK: dup.2s
> > +       %tmp1 = insertelement <2 x float> undef, float %A, i32 0
> > +       %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x
> i32> zeroinitializer
> > +       ret <2 x float> %tmp2
> > +}
> > +
> > +define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
> > +;CHECK-LABEL: v_shuffledupQ8:
> > +;CHECK: dup.16b
> > +       %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
> > +       %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x
> i32> zeroinitializer
> > +       ret <16 x i8> %tmp2
> > +}
> > +
> > +define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
> > +;CHECK-LABEL: v_shuffledupQ16:
> > +;CHECK: dup.8h
> > +       %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
> > +       %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x
> i32> zeroinitializer
> > +       ret <8 x i16> %tmp2
> > +}
> > +
> > +define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
> > +;CHECK-LABEL: v_shuffledupQ32:
> > +;CHECK: dup.4s
> > +       %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
> > +       %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x
> i32> zeroinitializer
> > +       ret <4 x i32> %tmp2
> > +}
> > +
> > +define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
> > +;CHECK-LABEL: v_shuffledupQfloat:
> > +;CHECK: dup.4s
> > +       %tmp1 = insertelement <4 x float> undef, float %A, i32 0
> > +       %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x
> i32> zeroinitializer
> > +       ret <4 x float> %tmp2
> > +}
> > +
> > +define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: vduplane8:
> > +;CHECK: dup.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
> > +       ret <8 x i8> %tmp2
> > +}
> > +
> > +define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: vduplane16:
> > +;CHECK: dup.4h
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x
> i32> < i32 1, i32 1, i32 1, i32 1 >
> > +       ret <4 x i16> %tmp2
> > +}
> > +
> > +define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: vduplane32:
> > +;CHECK: dup.2s
> > +       %tmp1 = load <2 x i32>* %A
> > +       %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x
> i32> < i32 1, i32 1 >
> > +       ret <2 x i32> %tmp2
> > +}
> > +
> > +define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
> > +;CHECK-LABEL: vduplanefloat:
> > +;CHECK: dup.2s
> > +       %tmp1 = load <2 x float>* %A
> > +       %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x
> i32> < i32 1, i32 1 >
> > +       ret <2 x float> %tmp2
> > +}
> > +
> > +define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: vduplaneQ8:
> > +;CHECK: dup.16b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32>
> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
> > +       ret <16 x i8> %tmp2
> > +}
> > +
> > +define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: vduplaneQ16:
> > +;CHECK: dup.8h
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x
> i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
> > +       ret <8 x i16> %tmp2
> > +}
> > +
> > +define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: vduplaneQ32:
> > +;CHECK: dup.4s
> > +       %tmp1 = load <2 x i32>* %A
> > +       %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x
> i32> < i32 1, i32 1, i32 1, i32 1 >
> > +       ret <4 x i32> %tmp2
> > +}
> > +
> > +define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
> > +;CHECK-LABEL: vduplaneQfloat:
> > +;CHECK: dup.4s
> > +       %tmp1 = load <2 x float>* %A
> > +       %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x
> i32> < i32 1, i32 1, i32 1, i32 1 >
> > +       ret <4 x float> %tmp2
> > +}
> > +
> > +define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
> > +;CHECK-LABEL: foo:
> > +;CHECK: dup.2d
> > +entry:
> > +  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x
> i32> <i32 1, i32 1>
> > +  ret <2 x i64> %0
> > +}
> > +
> > +define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
> > +;CHECK-LABEL: bar:
> > +;CHECK: dup.2d
> > +entry:
> > +  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x
> i32> <i32 0, i32 0>
> > +  ret <2 x i64> %0
> > +}
> > +
> > +define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind
> readnone {
> > +;CHECK-LABEL: baz:
> > +;CHECK: dup.2d
> > +entry:
> > +  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef,
> <2 x i32> <i32 1, i32 1>
> > +  ret <2 x double> %0
> > +}
> > +
> > +define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind
> readnone {
> > +;CHECK-LABEL: qux:
> > +;CHECK: dup.2d
> > +entry:
> > +  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef,
> <2 x i32> <i32 0, i32 0>
> > +  ret <2 x double> %0
> > +}
> > +
> > +define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  {
> > +; CHECK-LABEL: f:
> > +; CHECK-NEXT: fmov s0, w0
> > +; CHECK-NEXT: ins.s v0[1], w1
> > +; CHECK-NEXT: ret
> > +  %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
> > +  %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
> > +  ret <2 x i32> %vecinit1
> > +}
> > +
> > +define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone  {
> > +; CHECK-LABEL: g:
> > +; CHECK-NEXT: fmov s0, w0
> > +; CHECK-NEXT: ins.s v0[1], w1
> > +; CHECK-NEXT: ins.s v0[2], w1
> > +; CHECK-NEXT: ins.s v0[3], w0
> > +; CHECK-NEXT: ret
> > +  %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
> > +  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
> > +  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
> > +  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3
> > +  ret <4 x i32> %vecinit3
> > +}
> > +
> > +define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone  {
> > +; CHECK-LABEL: h:
> > +; CHECK-NEXT: fmov d0, x0
> > +; CHECK-NEXT: ins.d v0[1], x1
> > +; CHECK-NEXT: ret
> > +  %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
> > +  %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
> > +  ret <2 x i64> %vecinit1
> > +}
> > +
> > +; We used to spot this as a BUILD_VECTOR implementable by dup, but
> assume that
> > +; the single value needed was of the same type as the vector. This is
> false if
> > +; the scalar corresponding to the vector type is illegal (e.g. a <4 x
> i16>
> > +; BUILD_VECTOR will have an i32 as its source). In that case, the
> operation is
> > +; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed.
> > +define <4 x i16> @test_build_illegal(<4 x i32> %in) {
> > +; CHECK-LABEL: test_build_illegal:
> > +; CHECK: umov.s [[WTMP:w[0-9]+]], v0[3]
> > +; CHECK: dup.4h v0, [[WTMP]]
> > +  %val = extractelement <4 x i32> %in, i32 3
> > +  %smallval = trunc i32 %val to i16
> > +  %vec = insertelement <4x i16> undef, i16 %smallval, i32 3
> > +
> > +  ret <4 x i16> %vec
> > +}
> > +
> > +; We used to inherit an already extract_subvectored v4i16 from
> > +; SelectionDAGBuilder here. We then added a DUPLANE on top of that,
> preventing
> > +; the formation of an indexed-by-7 MLS.
> > +define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16>
> %v) #0 {
> > +; CHECK-LABEL: test_high_splat:
> > +; CHECK: mls.4h v0, v1, v2[7]
> > +entry:
> > +  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32>
> <i32 7, i32 7, i32 7, i32 7>
> > +  %mul = mul <4 x i16> %shuffle, %b
> > +  %sub = sub <4 x i16> %a, %mul
> > +  ret <4 x i16> %sub
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/early-ifcvt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/early-ifcvt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/early-ifcvt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/early-ifcvt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,423 @@
> > +; RUN: llc < %s -stress-early-ifcvt | FileCheck %s
> > +target triple = "arm64-apple-macosx"
> > +
> > +; CHECK: mm2
> > +define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly
> ssp {
> > +entry:
> > +  br label %do.body
> > +
> > +; CHECK: do.body
> > +; Loop body has no branches before the backedge.
> > +; CHECK-NOT: LBB
> > +do.body:
> > +  %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
> > +  %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
> > +  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
> > +  %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
> > +  %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
> > +  %0 = load i32* %p.addr.0, align 4
> > +  %cmp = icmp sgt i32 %0, %max.0
> > +  br i1 %cmp, label %do.cond, label %if.else
> > +
> > +if.else:
> > +  %cmp1 = icmp slt i32 %0, %min.0
> > +  %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
> > +  br label %do.cond
> > +
> > +do.cond:
> > +  %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
> > +  %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
> > +; CHECK: cbnz
> > +  %dec = add i32 %n.addr.0, -1
> > +  %tobool = icmp eq i32 %dec, 0
> > +  br i1 %tobool, label %do.end, label %do.body
> > +
> > +do.end:
> > +  %sub = sub nsw i32 %max.1, %min.1
> > +  ret i32 %sub
> > +}
> > +
> > +; CHECK-LABEL: fold_inc_true_32:
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #1
> > +; CHECK-NEXT: csinc w0, w1, w0, eq
> > +; CHECK-NEXT: ret
> > +define i32 @fold_inc_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i32 %c, 1
> > +  %inc = add nsw i32 %x, 1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %y, %eq_bb ], [ %inc, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_inc_true_64:
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #1
> > +; CHECK-NEXT: csinc x0, x1, x0, eq
> > +; CHECK-NEXT: ret
> > +define i64 @fold_inc_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i64 %c, 1
> > +  %inc = add nsw i64 %x, 1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %y, %eq_bb ], [ %inc, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_inc_false_32:
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #1
> > +; CHECK-NEXT: csinc w0, w1, w0, ne
> > +; CHECK-NEXT: ret
> > +define i32 @fold_inc_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i32 %c, 1
> > +  %inc = add nsw i32 %x, 1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %inc, %eq_bb ], [ %y, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_inc_false_64:
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #1
> > +; CHECK-NEXT: csinc x0, x1, x0, ne
> > +; CHECK-NEXT: ret
> > +define i64 @fold_inc_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i64 %c, 1
> > +  %inc = add nsw i64 %x, 1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %inc, %eq_bb ], [ %y, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_inv_true_32:
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #1
> > +; CHECK-NEXT: csinv w0, w1, w0, eq
> > +; CHECK-NEXT: ret
> > +define i32 @fold_inv_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i32 %c, 1
> > +  %inv = xor i32 %x, -1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %y, %eq_bb ], [ %inv, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_inv_true_64:
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #1
> > +; CHECK-NEXT: csinv x0, x1, x0, eq
> > +; CHECK-NEXT: ret
> > +define i64 @fold_inv_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i64 %c, 1
> > +  %inv = xor i64 %x, -1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %y, %eq_bb ], [ %inv, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_inv_false_32:
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #1
> > +; CHECK-NEXT: csinv w0, w1, w0, ne
> > +; CHECK-NEXT: ret
> > +define i32 @fold_inv_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i32 %c, 1
> > +  %inv = xor i32 %x, -1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %inv, %eq_bb ], [ %y, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_inv_false_64:
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #1
> > +; CHECK-NEXT: csinv x0, x1, x0, ne
> > +; CHECK-NEXT: ret
> > +define i64 @fold_inv_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i64 %c, 1
> > +  %inv = xor i64 %x, -1
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %inv, %eq_bb ], [ %y, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_neg_true_32:
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #1
> > +; CHECK-NEXT: csneg w0, w1, w0, eq
> > +; CHECK-NEXT: ret
> > +define i32 @fold_neg_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i32 %c, 1
> > +  %neg = sub nsw i32 0, %x
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %y, %eq_bb ], [ %neg, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_neg_true_64:
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #1
> > +; CHECK-NEXT: csneg x0, x1, x0, eq
> > +; CHECK-NEXT: ret
> > +define i64 @fold_neg_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i64 %c, 1
> > +  %neg = sub nsw i64 0, %x
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %y, %eq_bb ], [ %neg, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_neg_false_32:
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #1
> > +; CHECK-NEXT: csneg w0, w1, w0, ne
> > +; CHECK-NEXT: ret
> > +define i32 @fold_neg_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i32 %c, 1
> > +  %neg = sub nsw i32 0, %x
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %neg, %eq_bb ], [ %y, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK-LABEL: fold_neg_false_64:
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #1
> > +; CHECK-NEXT: csneg x0, x1, x0, ne
> > +; CHECK-NEXT: ret
> > +define i64 @fold_neg_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i64 %c, 1
> > +  %neg = sub nsw i64 0, %x
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %neg, %eq_bb ], [ %y, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK: cbnz_32
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #0
> > +; CHECK-NEXT: csel w0, w1, w0, ne
> > +; CHECK-NEXT: ret
> > +define i32 @cbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i32 %c, 0
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK: cbnz_64
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #0
> > +; CHECK-NEXT: csel x0, x1, x0, ne
> > +; CHECK-NEXT: ret
> > +define i64 @cbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp eq i64 %c, 0
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK: cbz_32
> > +; CHECK: {{subs.*wzr,|cmp}} w2, #0
> > +; CHECK-NEXT: csel w0, w1, w0, eq
> > +; CHECK-NEXT: ret
> > +define i32 @cbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp ne i32 %c, 0
> > +  br i1 %tobool, label %ne_bb, label %done
> > +
> > +ne_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK: cbz_64
> > +; CHECK: {{subs.*xzr,|cmp}} x2, #0
> > +; CHECK-NEXT: csel x0, x1, x0, eq
> > +; CHECK-NEXT: ret
> > +define i64 @cbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %tobool = icmp ne i64 %c, 0
> > +  br i1 %tobool, label %ne_bb, label %done
> > +
> > +ne_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK: tbnz_32
> > +; CHECK: {{ands.*xzr,|tst}} x2, #0x80
> > +; CHECK-NEXT: csel w0, w1, w0, ne
> > +; CHECK-NEXT: ret
> > +define i32 @tbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %mask = and i32 %c, 128
> > +  %tobool = icmp eq i32 %mask, 0
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK: tbnz_64
> > +; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000
> > +; CHECK-NEXT: csel x0, x1, x0, ne
> > +; CHECK-NEXT: ret
> > +define i64 @tbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %mask = and i64 %c, 9223372036854775808
> > +  %tobool = icmp eq i64 %mask, 0
> > +  br i1 %tobool, label %eq_bb, label %done
> > +
> > +eq_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; CHECK: tbz_32
> > +; CHECK: {{ands.*xzr,|tst}} x2, #0x80
> > +; CHECK-NEXT: csel w0, w1, w0, eq
> > +; CHECK-NEXT: ret
> > +define i32 @tbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
> > +entry:
> > +  %mask = and i32 %c, 128
> > +  %tobool = icmp ne i32 %mask, 0
> > +  br i1 %tobool, label %ne_bb, label %done
> > +
> > +ne_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ]
> > +  ret i32 %cond
> > +}
> > +
> > +; CHECK: tbz_64
> > +; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000
> > +; CHECK-NEXT: csel x0, x1, x0, eq
> > +; CHECK-NEXT: ret
> > +define i64 @tbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
> > +entry:
> > +  %mask = and i64 %c, 9223372036854775808
> > +  %tobool = icmp ne i64 %mask, 0
> > +  br i1 %tobool, label %ne_bb, label %done
> > +
> > +ne_bb:
> > +  br label %done
> > +
> > +done:
> > +  %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ]
> > +  ret i64 %cond
> > +}
> > +
> > +; This function from 175.vpr folds an ADDWri into a CSINC.
> > +; Remember to clear the kill flag on the ADDWri.
> > +define i32 @get_ytrack_to_xtracks() nounwind ssp {
> > +entry:
> > +  br label %for.body
> > +
> > +for.body:
> > +  %x0 = load i32* undef, align 4
> > +  br i1 undef, label %if.then.i146, label %is_sbox.exit155
> > +
> > +if.then.i146:
> > +  %add8.i143 = add nsw i32 0, %x0
> > +  %rem.i144 = srem i32 %add8.i143, %x0
> > +  %add9.i145 = add i32 %rem.i144, 1
> > +  br label %is_sbox.exit155
> > +
> > +is_sbox.exit155:                                  ; preds =
> %if.then.i146, %for.body
> > +  %seg_offset.0.i151 = phi i32 [ %add9.i145, %if.then.i146 ], [ undef,
> %for.body ]
> > +  %idxprom15.i152 = sext i32 %seg_offset.0.i151 to i64
> > +  %arrayidx18.i154 = getelementptr inbounds i32* null, i64
> %idxprom15.i152
> > +  %x1 = load i32* %arrayidx18.i154, align 4
> > +  br i1 undef, label %for.body51, label %for.body
> > +
> > +for.body51:                                       ; preds =
> %is_sbox.exit155
> > +  call fastcc void @get_switch_type(i32 %x1, i32 undef, i16 signext
> undef, i16 signext undef, i16* undef)
> > +  unreachable
> > +}
> > +declare fastcc void @get_switch_type(i32, i32, i16 signext, i16
> signext, i16* nocapture) nounwind ssp
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/elf-calls.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/elf-calls.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/elf-calls.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/elf-calls.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,20 @@
> > +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s |
> llvm-objdump -triple=arm64-linux-gnu - -r | FileCheck %s
> --check-prefix=CHECK-OBJ
> > +
> > +declare void @callee()
> > +
> > +define void @caller() {
> > +  call void @callee()
> > +  ret void
> > +; CHECK-LABEL: caller:
> > +; CHECK:     bl callee
> > +; CHECK-OBJ: R_AARCH64_CALL26 callee
> > +}
> > +
> > +define void @tail_caller() {
> > +  tail call void @callee()
> > +  ret void
> > +; CHECK-LABEL: tail_caller:
> > +; CHECK:     b callee
> > +; CHECK-OBJ: R_AARCH64_JUMP26 callee
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/elf-constpool.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/elf-constpool.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/elf-constpool.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/elf-constpool.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,13 @@
> > +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-linux-gnu -O0 -o - %s | FileCheck %s
> > +
> > +; O0 checked for fastisel purposes. It has a separate path which
> > +; creates a constpool entry for floating values.
> > +
> > +define double @needs_const() {
> > +  ret double 3.14159
> > +; CHECK: .LCPI0_0:
> > +
> > +; CHECK: adrp {{x[0-9]+}}, .LCPI0_0
> > +; CHECK: ldr d0, [{{x[0-9]+}}, :lo12:.LCPI0_0]
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/elf-globals.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/elf-globals.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/elf-globals.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/elf-globals.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,115 @@
> > +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 | FileCheck %s
> --check-prefix=CHECK-FAST
> > +; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic -o - %s |
> FileCheck %s --check-prefix=CHECK-PIC
> > +; RUN: llc -mtriple=arm64-linux-gnu -O0 -relocation-model=pic -o - %s |
> FileCheck %s --check-prefix=CHECK-FAST-PIC
> > +
> > + at var8 = external global i8, align 1
> > + at var16 = external global i16, align 2
> > + at var32 = external global i32, align 4
> > + at var64 = external global i64, align 8
> > +
> > +define i8 @test_i8(i8 %new) {
> > +  %val = load i8* @var8, align 1
> > +  store i8 %new, i8* @var8
> > +  ret i8 %val
> > +; CHECK-LABEL: test_i8:
> > +; CHECK: adrp x[[HIREG:[0-9]+]], var8
> > +; CHECK: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
> > +; CHECK: strb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
> > +
> > +; CHECK-PIC-LABEL: test_i8:
> > +; CHECK-PIC: adrp x[[HIREG:[0-9]+]], :got:var8
> > +; CHECK-PIC: ldr x[[VAR_ADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8]
> > +; CHECK-PIC: ldrb {{w[0-9]+}}, [x[[VAR_ADDR]]]
> > +
> > +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var8
> > +; CHECK-FAST: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
> > +
> > +; CHECK-FAST-PIC: adrp x[[HIREG:[0-9]+]], :got:var8
> > +; CHECK-FAST-PIC: ldr x[[VARADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8]
> > +; CHECK-FAST-PIC: ldr {{w[0-9]+}}, [x[[VARADDR]]]
> > +}
> > +
> > +define i16 @test_i16(i16 %new) {
> > +  %val = load i16* @var16, align 2
> > +  store i16 %new, i16* @var16
> > +  ret i16 %val
> > +; CHECK-LABEL: test_i16:
> > +; CHECK: adrp x[[HIREG:[0-9]+]], var16
> > +; CHECK: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
> > +; CHECK: strh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
> > +
> > +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var16
> > +; CHECK-FAST: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
> > +}
> > +
> > +define i32 @test_i32(i32 %new) {
> > +  %val = load i32* @var32, align 4
> > +  store i32 %new, i32* @var32
> > +  ret i32 %val
> > +; CHECK-LABEL: test_i32:
> > +; CHECK: adrp x[[HIREG:[0-9]+]], var32
> > +; CHECK: ldr {{w[0-9]+}}, [x[[HIREG]], :lo12:var32]
> > +; CHECK: str {{w[0-9]+}}, [x[[HIREG]], :lo12:var32]
> > +
> > +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var32
> > +; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var32
> > +}
> > +
> > +define i64 @test_i64(i64 %new) {
> > +  %val = load i64* @var64, align 8
> > +  store i64 %new, i64* @var64
> > +  ret i64 %val
> > +; CHECK-LABEL: test_i64:
> > +; CHECK: adrp x[[HIREG:[0-9]+]], var64
> > +; CHECK: ldr {{x[0-9]+}}, [x[[HIREG]], :lo12:var64]
> > +; CHECK: str {{x[0-9]+}}, [x[[HIREG]], :lo12:var64]
> > +
> > +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var64
> > +; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var64
> > +}
> > +
> > +define i64* @test_addr() {
> > +  ret i64* @var64
> > +; CHECK-LABEL: test_addr:
> > +; CHECK: adrp [[HIREG:x[0-9]+]], var64
> > +; CHECK: add x0, [[HIREG]], :lo12:var64
> > +
> > +; CHECK-FAST: adrp [[HIREG:x[0-9]+]], var64
> > +; CHECK-FAST: add x0, [[HIREG]], :lo12:var64
> > +}
> > +
> > + at hiddenvar = hidden global i32 0, align 4
> > + at protectedvar = protected global i32 0, align 4
> > +
> > +define i32 @test_vis() {
> > +  %lhs = load i32* @hiddenvar, align 4
> > +  %rhs = load i32* @protectedvar, align 4
> > +  %ret = add i32 %lhs, %rhs
> > +  ret i32 %ret
> > +; CHECK-PIC: adrp {{x[0-9]+}}, hiddenvar
> > +; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:hiddenvar]
> > +; CHECK-PIC: adrp {{x[0-9]+}}, protectedvar
> > +; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:protectedvar]
> > +}
> > +
> > + at var_default = external global [2 x i32]
> > +
> > +define i32 @test_default_align() {
> > +  %addr = getelementptr [2 x i32]* @var_default, i32 0, i32 0
> > +  %val = load i32* %addr
> > +  ret i32 %val
> > +; CHECK-LABEL: test_default_align:
> > +; CHECK: adrp x[[HIREG:[0-9]+]], var_default
> > +; CHECK: ldr w0, [x[[HIREG]], :lo12:var_default]
> > +}
> > +
> > +define i64 @test_default_unaligned() {
> > +  %addr = bitcast [2 x i32]* @var_default to i64*
> > +  %val = load i64* %addr
> > +  ret i64 %val
> > +; CHECK-LABEL: test_default_unaligned:
> > +; CHECK: adrp [[HIREG:x[0-9]+]], var_default
> > +; CHECK: add x[[ADDR:[0-9]+]], [[HIREG]], :lo12:var_default
> > +; CHECK: ldr x0, [x[[ADDR]]]
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/ext.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ext.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/ext.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/ext.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,101 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: test_vextd:
> > +;CHECK: {{ext.8b.*#3}}
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = load <8 x i8>* %B
> > +       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
> > +       ret <8 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: test_vextRd:
> > +;CHECK: {{ext.8b.*#5}}
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = load <8 x i8>* %B
> > +       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
> > +       ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: test_vextq:
> > +;CHECK: {{ext.16b.*3}}
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = load <16 x i8>* %B
> > +       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x
> i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32
> 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
> > +       ret <16 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: test_vextRq:
> > +;CHECK: {{ext.16b.*7}}
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = load <16 x i8>* %B
> > +       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x
> i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32
> 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
> > +       ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: test_vextd16:
> > +;CHECK: {{ext.8b.*#6}}
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp2 = load <4 x i16>* %B
> > +       %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x
> i32> <i32 3, i32 4, i32 5, i32 6>
> > +       ret <4 x i16> %tmp3
> > +}
> > +
> > +define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: test_vextq32:
> > +;CHECK: {{ext.16b.*12}}
> > +       %tmp1 = load <4 x i32>* %A
> > +       %tmp2 = load <4 x i32>* %B
> > +       %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x
> i32> <i32 3, i32 4, i32 5, i32 6>
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +; Undef shuffle indices should not prevent matching to VEXT:
> > +
> > +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: test_vextd_undef:
> > +;CHECK: {{ext.8b.*}}
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = load <8 x i8>* %B
> > +       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
> > +       ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B)
> nounwind {
> > +;CHECK-LABEL: test_vextRq_undef:
> > +;CHECK: {{ext.16b.*#7}}
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = load <16 x i8>* %B
> > +       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x
> i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30,
> i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
> > +       ret <16 x i8> %tmp3
> > +}
> > +
> > +; Tests for ReconstructShuffle function. Indices have to be carefully
> > +; chosen to reach lowering phase as a BUILD_VECTOR.
> > +
> > +; One vector needs vext, the other can be handled by extract_subvector
> > +; Also checks interleaving of sources is handled correctly.
> > +; Essence: a vext is used on %A and something saner than stack
> load/store for final result.
> > +define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B)
> nounwind {
> > +;CHECK-LABEL: test_interleaved:
> > +;CHECK: ext.8b
> > +;CHECK: zip1.4h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x
> i32> <i32 3, i32 8, i32 5, i32 9>
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +; An undef in the shuffle list should still be optimizable
> > +define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: test_undef:
> > +;CHECK: zip1.4h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x
> i32> <i32 undef, i32 8, i32 5, i32 9>
> > +        ret <4 x i16> %tmp3
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/extend-int-to-fp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/extend-int-to-fp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/extend-int-to-fp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/extend-int-to-fp.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,19 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <4 x float> @foo(<4 x i16> %a) nounwind {
> > +; CHECK-LABEL: foo:
> > +; CHECK: ushll.4s      v0, v0, #0
> > +; CHECK-NEXT: ucvtf.4s v0, v0
> > +; CHECK-NEXT: ret
> > +  %vcvt.i = uitofp <4 x i16> %a to <4 x float>
> > +  ret <4 x float> %vcvt.i
> > +}
> > +
> > +define <4 x float> @bar(<4 x i16> %a) nounwind {
> > +; CHECK-LABEL: bar:
> > +; CHECK: sshll.4s      v0, v0, #0
> > +; CHECK-NEXT: scvtf.4s v0, v0
> > +; CHECK-NEXT: ret
> > +  %vcvt.i = sitofp <4 x i16> %a to <4 x float>
> > +  ret <4 x float> %vcvt.i
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/extend.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/extend.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/extend.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/extend.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,15 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
> > + at array = external global [0 x i32]
> > +
> > +define i64 @foo(i32 %i) {
> > +; CHECK: foo
> > +; CHECK:  adrp  x[[REG:[0-9]+]], _array at GOTPAGE
> > +; CHECK:  ldr x[[REG1:[0-9]+]], [x[[REG]], _array at GOTPAGEOFF]
> > +; CHECK:  ldrsw x0, [x[[REG1]], x0, sxtw #2]
> > +; CHECK:  ret
> > +  %idxprom = sext i32 %i to i64
> > +  %arrayidx = getelementptr inbounds [0 x i32]* @array, i64 0, i64
> %idxprom
> > +  %tmp1 = load i32* %arrayidx, align 4
> > +  %conv = sext i32 %tmp1 to i64
> > +  ret i64 %conv
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/extload-knownzero.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/extload-knownzero.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/extload-knownzero.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/extload-knownzero.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,28 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +; rdar://12771555
> > +
> > +define void @foo(i16* %ptr, i32 %a) nounwind {
> > +entry:
> > +; CHECK-LABEL: foo:
> > +  %tmp1 = icmp ult i32 %a, 100
> > +  br i1 %tmp1, label %bb1, label %bb2
> > +bb1:
> > +; CHECK: %bb1
> > +; CHECK: ldrh [[REG:w[0-9]+]]
> > +  %tmp2 = load i16* %ptr, align 2
> > +  br label %bb2
> > +bb2:
> > +; CHECK: %bb2
> > +; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
> > +; CHECK: cmp [[REG]], #23
> > +  %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
> > +  %cmp = icmp ult i16 %tmp3, 24
> > +  br i1 %cmp, label %bb3, label %exit
> > +bb3:
> > +  call void @bar() nounwind
> > +  br label %exit
> > +exit:
> > +  ret void
> > +}
> > +
> > +declare void @bar ()
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/extract.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/extract.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/extract.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/extract.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,58 @@
> > +; RUN: llc -arm64-extr-generation=true -verify-machineinstrs < %s \
> > +; RUN: -march=arm64 | FileCheck %s
> > +
> > +define i64 @ror_i64(i64 %in) {
> > +; CHECK-LABEL: ror_i64:
> > +    %left = shl i64 %in, 19
> > +    %right = lshr i64 %in, 45
> > +    %val5 = or i64 %left, %right
> > +; CHECK: extr {{x[0-9]+}}, x0, x0, #45
> > +    ret i64 %val5
> > +}
> > +
> > +define i32 @ror_i32(i32 %in) {
> > +; CHECK-LABEL: ror_i32:
> > +    %left = shl i32 %in, 9
> > +    %right = lshr i32 %in, 23
> > +    %val5 = or i32 %left, %right
> > +; CHECK: extr {{w[0-9]+}}, w0, w0, #23
> > +    ret i32 %val5
> > +}
> > +
> > +define i32 @extr_i32(i32 %lhs, i32 %rhs) {
> > +; CHECK-LABEL: extr_i32:
> > +  %left = shl i32 %lhs, 6
> > +  %right = lshr i32 %rhs, 26
> > +  %val = or i32 %left, %right
> > +  ; Order of lhs and rhs matters here. Regalloc would have to be very
> odd to use
> > +  ; something other than w0 and w1.
> > +; CHECK: extr {{w[0-9]+}}, w0, w1, #26
> > +
> > +  ret i32 %val
> > +}
> > +
> > +define i64 @extr_i64(i64 %lhs, i64 %rhs) {
> > +; CHECK-LABEL: extr_i64:
> > +  %right = lshr i64 %rhs, 40
> > +  %left = shl i64 %lhs, 24
> > +  %val = or i64 %right, %left
> > +  ; Order of lhs and rhs matters here. Regalloc would have to be very
> odd to use
> > +  ; something other than w0 and w1.
> > +; CHECK: extr {{x[0-9]+}}, x0, x1, #40
> > +
> > +  ret i64 %val
> > +}
> > +
> > +; Regression test: a bad experimental pattern crept into git which
> optimised
> > +; this pattern to a single EXTR.
> > +define i32 @extr_regress(i32 %a, i32 %b) {
> > +; CHECK-LABEL: extr_regress:
> > +
> > +    %sh1 = shl i32 %a, 14
> > +    %sh2 = lshr i32 %b, 14
> > +    %val = or i32 %sh2, %sh1
> > +; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}}
> > +
> > +    ret i32 %val
> > +; CHECK: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/extract_subvector.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/extract_subvector.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/extract_subvector.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/extract_subvector.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,51 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
> > +
> > +; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8"
> insn.
> > +
> > +define <8 x i8> @v8i8(<16 x i8> %a) nounwind {
> > +; CHECK: v8i8
> > +; CHECK: ext.16b v0, v0, v0, #8
> > +; CHECK: ret
> > +  %ret = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32>  <i32 8,
> i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +  ret <8 x i8> %ret
> > +}
> > +
> > +define <4 x i16> @v4i16(<8 x i16> %a) nounwind {
> > +; CHECK-LABEL: v4i16:
> > +; CHECK: ext.16b v0, v0, v0, #8
> > +; CHECK: ret
> > +  %ret = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32>  <i32 4,
> i32 5, i32 6, i32 7>
> > +  ret <4 x i16> %ret
> > +}
> > +
> > +define <2 x i32> @v2i32(<4 x i32> %a) nounwind {
> > +; CHECK-LABEL: v2i32:
> > +; CHECK: ext.16b v0, v0, v0, #8
> > +; CHECK: ret
> > +  %ret = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32>  <i32 2,
> i32 3>
> > +  ret <2 x i32> %ret
> > +}
> > +
> > +define <1 x i64> @v1i64(<2 x i64> %a) nounwind {
> > +; CHECK-LABEL: v1i64:
> > +; CHECK: ext.16b v0, v0, v0, #8
> > +; CHECK: ret
> > +  %ret = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32>  <i32 1>
> > +  ret <1 x i64> %ret
> > +}
> > +
> > +define <2 x float> @v2f32(<4 x float> %a) nounwind {
> > +; CHECK-LABEL: v2f32:
> > +; CHECK: ext.16b v0, v0, v0, #8
> > +; CHECK: ret
> > +  %ret = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32>  <i32
> 2, i32 3>
> > +  ret <2 x float> %ret
> > +}
> > +
> > +define <1 x double> @v1f64(<2 x double> %a) nounwind {
> > +; CHECK-LABEL: v1f64:
> > +; CHECK: ext.16b v0, v0, v0, #8
> > +; CHECK: ret
> > +  %ret = shufflevector <2 x double> %a, <2 x double> %a, <1 x i32>
>  <i32 1>
> > +  ret <1 x double> %ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-addr-offset.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-addr-offset.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-addr-offset.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-addr-offset.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,47 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > + at sortlist = common global [5001 x i32] zeroinitializer, align 16
> > + at sortlist2 = common global [5001 x i64] zeroinitializer, align 16
> > +
> > +; Load an address with an offset larget then LDR imm can handle
> > +define i32 @foo() nounwind {
> > +entry:
> > +; CHECK: @foo
> > +; CHECK: adrp x[[REG:[0-9]+]], _sortlist at GOTPAGE
> > +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist at GOTPAGEOFF]
> > +; CHECK: movz x[[REG2:[0-9]+]], #20000
> > +; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
> > +; CHECK: ldr w0, [x[[REG3]]]
> > +; CHECK: ret
> > +  %0 = load i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32
> 0, i64 5000), align 4
> > +  ret i32 %0
> > +}
> > +
> > +define i64 @foo2() nounwind {
> > +entry:
> > +; CHECK: @foo2
> > +; CHECK: adrp x[[REG:[0-9]+]], _sortlist2 at GOTPAGE
> > +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist2 at GOTPAGEOFF]
> > +; CHECK: movz x[[REG2:[0-9]+]], #40000
> > +; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
> > +; CHECK: ldr x0, [x[[REG3]]]
> > +; CHECK: ret
> > +  %0 = load i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32
> 0, i64 5000), align 4
> > +  ret i64 %0
> > +}
> > +
> > +; Load an address with a ridiculously large offset.
> > +; rdar://12505553
> > + at pd2 = common global i8* null, align 8
> > +
> > +define signext i8 @foo3() nounwind ssp {
> > +entry:
> > +; CHECK: @foo3
> > +; CHECK: movz x[[REG:[0-9]+]], #2874, lsl #32
> > +; CHECK: movk x[[REG]], #29646, lsl #16
> > +; CHECK: movk x[[REG]], #12274
> > +  %0 = load i8** @pd2, align 8
> > +  %arrayidx = getelementptr inbounds i8* %0, i64 12345678901234
> > +  %1 = load i8* %arrayidx, align 1
> > +  ret i8 %1
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-alloca.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-alloca.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-alloca.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-alloca.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,24 @@
> > +; This test should cause the TargetMaterializeAlloca to be invoked
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +%struct.S1Ty = type { i64 }
> > +%struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty }
> > +
> > +define void @takeS1(%struct.S1Ty* %V) nounwind {
> > +entry:
> > +  %V.addr = alloca %struct.S1Ty*, align 8
> > +  store %struct.S1Ty* %V, %struct.S1Ty** %V.addr, align 8
> > +  ret void
> > +}
> > +
> > +define void @main() nounwind {
> > +entry:
> > +; CHECK: main
> > +; CHECK: mov x[[REG:[0-9]+]], sp
> > +; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
> > +; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
> > +  %E = alloca %struct.S2Ty, align 4
> > +  %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
> > +  call void @takeS1(%struct.S1Ty* %B)
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-br.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-br.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-br.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-br.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,155 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +define void @branch1() nounwind uwtable ssp {
> > +  %x = alloca i32, align 4
> > +  store i32 0, i32* %x, align 4
> > +  %1 = load i32* %x, align 4
> > +  %2 = icmp ne i32 %1, 0
> > +  br i1 %2, label %3, label %4
> > +
> > +; <label>:3                                       ; preds = %0
> > +  br label %4
> > +
> > +; <label>:4                                       ; preds = %3, %0
> > +  ret void
> > +}
> > +
> > +define void @branch2() nounwind uwtable ssp {
> > +  %1 = alloca i32, align 4
> > +  %x = alloca i32, align 4
> > +  %y = alloca i32, align 4
> > +  %z = alloca i32, align 4
> > +  store i32 0, i32* %1
> > +  store i32 1, i32* %y, align 4
> > +  store i32 1, i32* %x, align 4
> > +  store i32 0, i32* %z, align 4
> > +  %2 = load i32* %x, align 4
> > +  %3 = icmp ne i32 %2, 0
> > +  br i1 %3, label %4, label %5
> > +
> > +; <label>:4                                       ; preds = %0
> > +  store i32 0, i32* %1
> > +  br label %14
> > +
> > +; <label>:5                                       ; preds = %0
> > +  %6 = load i32* %y, align 4
> > +  %7 = icmp ne i32 %6, 0
> > +  br i1 %7, label %8, label %13
> > +
> > +; <label>:8                                       ; preds = %5
> > +  %9 = load i32* %z, align 4
> > +  %10 = icmp ne i32 %9, 0
> > +  br i1 %10, label %11, label %12
> > +
> > +; <label>:11                                      ; preds = %8
> > +  store i32 1, i32* %1
> > +  br label %14
> > +
> > +; <label>:12                                      ; preds = %8
> > +  store i32 0, i32* %1
> > +  br label %14
> > +
> > +; <label>:13                                      ; preds = %5
> > +  br label %14
> > +
> > +; <label>:14                                      ; preds = %4, %11,
> %12, %13
> > +  %15 = load i32* %1
> > +  ret void
> > +}
> > +
> > +define void @true_() nounwind uwtable ssp {
> > +; CHECK: @true_
> > +; CHECK: b LBB2_1
> > +  br i1 true, label %1, label %2
> > +
> > +; <label>:1
> > +; CHECK: LBB2_1
> > +  br label %2
> > +
> > +; <label>:2
> > +  ret void
> > +}
> > +
> > +define void @false_() nounwind uwtable ssp {
> > +; CHECK: @false_
> > +; CHECK: b LBB3_2
> > +  br i1 false, label %1, label %2
> > +
> > +; <label>:1
> > +  br label %2
> > +
> > +; <label>:2
> > +; CHECK: LBB3_2
> > +  ret void
> > +}
> > +
> > +define zeroext i8 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64
> %d) {
> > +entry:
> > +  %a.addr = alloca i8, align 1
> > +  %b.addr = alloca i16, align 2
> > +  %c.addr = alloca i32, align 4
> > +  %d.addr = alloca i64, align 8
> > +  store i8 %a, i8* %a.addr, align 1
> > +  store i16 %b, i16* %b.addr, align 2
> > +  store i32 %c, i32* %c.addr, align 4
> > +  store i64 %d, i64* %d.addr, align 8
> > +  %0 = load i16* %b.addr, align 2
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: subs w0, w0, #0
> > +; CHECK: b.eq LBB4_2
> > +  %conv = trunc i16 %0 to i1
> > +  br i1 %conv, label %if.then, label %if.end
> > +
> > +if.then:                                          ; preds = %entry
> > +  call void @foo1()
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %if.then,
> %entry
> > +  %1 = load i32* %c.addr, align 4
> > +; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
> > +; CHECK: subs w{{[0-9]+}}, w[[REG]], #0
> > +; CHECK: b.eq LBB4_4
> > +  %conv1 = trunc i32 %1 to i1
> > +  br i1 %conv1, label %if.then3, label %if.end4
> > +
> > +if.then3:                                         ; preds = %if.end
> > +  call void @foo1()
> > +  br label %if.end4
> > +
> > +if.end4:                                          ; preds = %if.then3,
> %if.end
> > +  %2 = load i64* %d.addr, align 8
> > +; CHECK: subs w{{[0-9]+}}, w{{[0-9]+}}, #0
> > +; CHECK: b.eq LBB4_6
> > +  %conv5 = trunc i64 %2 to i1
> > +  br i1 %conv5, label %if.then7, label %if.end8
> > +
> > +if.then7:                                         ; preds = %if.end4
> > +  call void @foo1()
> > +  br label %if.end8
> > +
> > +if.end8:                                          ; preds = %if.then7,
> %if.end4
> > +  %3 = load i8* %a.addr, align 1
> > +  ret i8 %3
> > +}
> > +
> > +declare void @foo1()
> > +
> > +; rdar://15174028
> > +define i32 @trunc64(i64 %foo) nounwind {
> > +; CHECK: trunc64
> > +; CHECK: orr  [[REG:x[0-9]+]], xzr, #0x1
> > +; CHECK: and  [[REG2:x[0-9]+]], x0, [[REG]]
> > +; CHECK: mov  x[[REG3:[0-9]+]], [[REG2]]
> > +; CHECK: and  [[REG4:w[0-9]+]], w[[REG3]], #0x1
> > +; CHECK: subs {{w[0-9]+}}, [[REG4]], #0
> > +; CHECK: b.eq LBB5_2
> > +  %a = and i64 %foo, 1
> > +  %b = trunc i64 %a to i1
> > +  br i1 %b, label %if.then, label %if.else
> > +
> > +if.then:
> > +  ret i32 1
> > +
> > +if.else:
> > +  ret i32 0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-call.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-call.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-call.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-call.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,91 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +define void @call0() nounwind {
> > +entry:
> > +  ret void
> > +}
> > +
> > +define void @foo0() nounwind {
> > +entry:
> > +; CHECK: foo0
> > +; CHECK: bl _call0
> > +  call void @call0()
> > +  ret void
> > +}
> > +
> > +define i32 @call1(i32 %a) nounwind {
> > +entry:
> > +  %a.addr = alloca i32, align 4
> > +  store i32 %a, i32* %a.addr, align 4
> > +  %tmp = load i32* %a.addr, align 4
> > +  ret i32 %tmp
> > +}
> > +
> > +define i32 @foo1(i32 %a) nounwind {
> > +entry:
> > +; CHECK: foo1
> > +; CHECK: stur w0, [fp, #-4]
> > +; CHECK-NEXT: ldur w0, [fp, #-4]
> > +; CHECK-NEXT: bl _call1
> > +  %a.addr = alloca i32, align 4
> > +  store i32 %a, i32* %a.addr, align 4
> > +  %tmp = load i32* %a.addr, align 4
> > +  %call = call i32 @call1(i32 %tmp)
> > +  ret i32 %call
> > +}
> > +
> > +define i32 @sext_(i8 %a, i16 %b) nounwind {
> > +entry:
> > +; CHECK: @sext_
> > +; CHECK: sxtb w0, w0
> > +; CHECK: sxth w1, w1
> > +; CHECK: bl _foo_sext_
> > +  call void @foo_sext_(i8 signext %a, i16 signext %b)
> > +  ret i32 0
> > +}
> > +
> > +declare void @foo_sext_(i8 %a, i16 %b)
> > +
> > +define i32 @zext_(i8 %a, i16 %b) nounwind {
> > +entry:
> > +; CHECK: @zext_
> > +; CHECK: uxtb w0, w0
> > +; CHECK: uxth w1, w1
> > +  call void @foo_zext_(i8 zeroext %a, i16 zeroext %b)
> > +  ret i32 0
> > +}
> > +
> > +declare void @foo_zext_(i8 %a, i16 %b)
> > +
> > +define i32 @t1(i32 %argc, i8** nocapture %argv) {
> > +entry:
> > +; CHECK: @t1
> > +; The last parameter will be passed on stack via i8.
> > +; CHECK: strb w{{[0-9]+}}, [sp]
> > +; CHECK-NEXT: bl _bar
> > +  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8
> zeroext 28, i8 zeroext 40, i8 zeroext -70, i8 zeroext 28, i8 zeroext 39, i8
> zeroext -41)
> > +  ret i32 0
> > +}
> > +
> > +declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8
> zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
> > +
> > +; Test materialization of integers.  Target-independent selector
> handles this.
> > +define i32 @t2() {
> > +entry:
> > +; CHECK: @t2
> > +; CHECK: movz x0, #0
> > +; CHECK: orr w1, wzr, #0xfffffff8
> > +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x3ff
> > +; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x2
> > +; CHECK: movz w[[REG3:[0-9]+]], #0
> > +; CHECK: orr w[[REG4:[0-9]+]], wzr, #0x1
> > +; CHECK: uxth w2, w[[REG]]
> > +; CHECK: sxtb w3, w[[REG2]]
> > +; CHECK: and w4, w[[REG3]], #0x1
> > +; CHECK: and w5, w[[REG4]], #0x1
> > +; CHECK: bl    _func2
> > +  %call = call i32 @func2(i64 zeroext 0, i32 signext -8, i16 zeroext
> 1023, i8 signext -254, i1 zeroext 0, i1 zeroext 1)
> > +  ret i32 0
> > +}
> > +
> > +declare i32 @func2(i64 zeroext, i32 signext, i16 zeroext, i8 signext,
> i1 zeroext, i1 zeroext)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-conversion.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-conversion.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-conversion.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-conversion.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,416 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +;; Test various conversions.
> > +define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64
> %d) nounwind ssp {
> > +entry:
> > +; CHECK: trunc_
> > +; CHECK: sub sp, sp, #16
> > +; CHECK: strb w0, [sp, #15]
> > +; CHECK: strh w1, [sp, #12]
> > +; CHECK: str w2, [sp, #8]
> > +; CHECK: str x3, [sp]
> > +; CHECK: ldr x3, [sp]
> > +; CHECK: mov x0, x3
> > +; CHECK: str w0, [sp, #8]
> > +; CHECK: ldr w0, [sp, #8]
> > +; CHECK: strh w0, [sp, #12]
> > +; CHECK: ldrh w0, [sp, #12]
> > +; CHECK: strb w0, [sp, #15]
> > +; CHECK: ldrb w0, [sp, #15]
> > +; CHECK: uxtb w0, w0
> > +; CHECK: add sp, sp, #16
> > +; CHECK: ret
> > +  %a.addr = alloca i8, align 1
> > +  %b.addr = alloca i16, align 2
> > +  %c.addr = alloca i32, align 4
> > +  %d.addr = alloca i64, align 8
> > +  store i8 %a, i8* %a.addr, align 1
> > +  store i16 %b, i16* %b.addr, align 2
> > +  store i32 %c, i32* %c.addr, align 4
> > +  store i64 %d, i64* %d.addr, align 8
> > +  %tmp = load i64* %d.addr, align 8
> > +  %conv = trunc i64 %tmp to i32
> > +  store i32 %conv, i32* %c.addr, align 4
> > +  %tmp1 = load i32* %c.addr, align 4
> > +  %conv2 = trunc i32 %tmp1 to i16
> > +  store i16 %conv2, i16* %b.addr, align 2
> > +  %tmp3 = load i16* %b.addr, align 2
> > +  %conv4 = trunc i16 %tmp3 to i8
> > +  store i8 %conv4, i8* %a.addr, align 1
> > +  %tmp5 = load i8* %a.addr, align 1
> > +  %conv6 = zext i8 %tmp5 to i32
> > +  ret i32 %conv6
> > +}
> > +
> > +define i64 @zext_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d)
> nounwind ssp {
> > +entry:
> > +; CHECK: zext_
> > +; CHECK: sub sp, sp, #16
> > +; CHECK: strb w0, [sp, #15]
> > +; CHECK: strh w1, [sp, #12]
> > +; CHECK: str w2, [sp, #8]
> > +; CHECK: str x3, [sp]
> > +; CHECK: ldrb w0, [sp, #15]
> > +; CHECK: uxtb w0, w0
> > +; CHECK: strh w0, [sp, #12]
> > +; CHECK: ldrh w0, [sp, #12]
> > +; CHECK: uxth w0, w0
> > +; CHECK: str w0, [sp, #8]
> > +; CHECK: ldr w0, [sp, #8]
> > +; CHECK: uxtw x3, w0
> > +; CHECK: str x3, [sp]
> > +; CHECK: ldr x0, [sp], #16
> > +; CHECK: ret
> > +  %a.addr = alloca i8, align 1
> > +  %b.addr = alloca i16, align 2
> > +  %c.addr = alloca i32, align 4
> > +  %d.addr = alloca i64, align 8
> > +  store i8 %a, i8* %a.addr, align 1
> > +  store i16 %b, i16* %b.addr, align 2
> > +  store i32 %c, i32* %c.addr, align 4
> > +  store i64 %d, i64* %d.addr, align 8
> > +  %tmp = load i8* %a.addr, align 1
> > +  %conv = zext i8 %tmp to i16
> > +  store i16 %conv, i16* %b.addr, align 2
> > +  %tmp1 = load i16* %b.addr, align 2
> > +  %conv2 = zext i16 %tmp1 to i32
> > +  store i32 %conv2, i32* %c.addr, align 4
> > +  %tmp3 = load i32* %c.addr, align 4
> > +  %conv4 = zext i32 %tmp3 to i64
> > +  store i64 %conv4, i64* %d.addr, align 8
> > +  %tmp5 = load i64* %d.addr, align 8
> > +  ret i64 %tmp5
> > +}
> > +
> > +define i32 @zext_i1_i32(i1 zeroext %a) nounwind ssp {
> > +entry:
> > +; CHECK: @zext_i1_i32
> > +; CHECK: and w0, w0, #0x1
> > +  %conv = zext i1 %a to i32
> > +  ret i32 %conv;
> > +}
> > +
> > +define i64 @zext_i1_i64(i1 zeroext %a) nounwind ssp {
> > +entry:
> > +; CHECK: @zext_i1_i64
> > +; CHECK: and w0, w0, #0x1
> > +  %conv = zext i1 %a to i64
> > +  ret i64 %conv;
> > +}
> > +
> > +define i64 @sext_(i8 signext %a, i16 signext %b, i32 %c, i64 %d)
> nounwind ssp {
> > +entry:
> > +; CHECK: sext_
> > +; CHECK: sub sp, sp, #16
> > +; CHECK: strb w0, [sp, #15]
> > +; CHECK: strh w1, [sp, #12]
> > +; CHECK: str w2, [sp, #8]
> > +; CHECK: str x3, [sp]
> > +; CHECK: ldrb w0, [sp, #15]
> > +; CHECK: sxtb w0, w0
> > +; CHECK: strh w0, [sp, #12]
> > +; CHECK: ldrh w0, [sp, #12]
> > +; CHECK: sxth w0, w0
> > +; CHECK: str w0, [sp, #8]
> > +; CHECK: ldr w0, [sp, #8]
> > +; CHECK: sxtw x3, w0
> > +; CHECK: str x3, [sp]
> > +; CHECK: ldr x0, [sp], #16
> > +; CHECK: ret
> > +  %a.addr = alloca i8, align 1
> > +  %b.addr = alloca i16, align 2
> > +  %c.addr = alloca i32, align 4
> > +  %d.addr = alloca i64, align 8
> > +  store i8 %a, i8* %a.addr, align 1
> > +  store i16 %b, i16* %b.addr, align 2
> > +  store i32 %c, i32* %c.addr, align 4
> > +  store i64 %d, i64* %d.addr, align 8
> > +  %tmp = load i8* %a.addr, align 1
> > +  %conv = sext i8 %tmp to i16
> > +  store i16 %conv, i16* %b.addr, align 2
> > +  %tmp1 = load i16* %b.addr, align 2
> > +  %conv2 = sext i16 %tmp1 to i32
> > +  store i32 %conv2, i32* %c.addr, align 4
> > +  %tmp3 = load i32* %c.addr, align 4
> > +  %conv4 = sext i32 %tmp3 to i64
> > +  store i64 %conv4, i64* %d.addr, align 8
> > +  %tmp5 = load i64* %d.addr, align 8
> > +  ret i64 %tmp5
> > +}
> > +
> > +; Test sext i8 to i64
> > +define i64 @sext_2(i8 signext %a) nounwind ssp {
> > +entry:
> > +; CHECK: sext_2
> > +; CHECK: sxtb x0, w0
> > +  %conv = sext i8 %a to i64
> > +  ret i64 %conv
> > +}
> > +
> > +; Test sext i1 to i32
> > +define i32 @sext_i1_i32(i1 signext %a) nounwind ssp {
> > +entry:
> > +; CHECK: sext_i1_i32
> > +; CHECK: sbfm w0, w0, #0, #0
> > +  %conv = sext i1 %a to i32
> > +  ret i32 %conv
> > +}
> > +
> > +; Test sext i1 to i16
> > +define signext i16 @sext_i1_i16(i1 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sext_i1_i16
> > +; CHECK: sbfm w0, w0, #0, #0
> > +  %conv = sext i1 %a to i16
> > +  ret i16 %conv
> > +}
> > +
> > +; Test sext i1 to i8
> > +define signext i8 @sext_i1_i8(i1 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sext_i1_i8
> > +; CHECK: sbfm w0, w0, #0, #0
> > +  %conv = sext i1 %a to i8
> > +  ret i8 %conv
> > +}
> > +
> > +; Test fpext
> > +define double @fpext_(float %a) nounwind ssp {
> > +entry:
> > +; CHECK: fpext_
> > +; CHECK: fcvt d0, s0
> > +  %conv = fpext float %a to double
> > +  ret double %conv
> > +}
> > +
> > +; Test fptrunc
> > +define float @fptrunc_(double %a) nounwind ssp {
> > +entry:
> > +; CHECK: fptrunc_
> > +; CHECK: fcvt s0, d0
> > +  %conv = fptrunc double %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test fptosi
> > +define i32 @fptosi_ws(float %a) nounwind ssp {
> > +entry:
> > +; CHECK: fptosi_ws
> > +; CHECK: fcvtzs w0, s0
> > +  %conv = fptosi float %a to i32
> > +  ret i32 %conv
> > +}
> > +
> > +; Test fptosi
> > +define i32 @fptosi_wd(double %a) nounwind ssp {
> > +entry:
> > +; CHECK: fptosi_wd
> > +; CHECK: fcvtzs w0, d0
> > +  %conv = fptosi double %a to i32
> > +  ret i32 %conv
> > +}
> > +
> > +; Test fptoui
> > +define i32 @fptoui_ws(float %a) nounwind ssp {
> > +entry:
> > +; CHECK: fptoui_ws
> > +; CHECK: fcvtzu w0, s0
> > +  %conv = fptoui float %a to i32
> > +  ret i32 %conv
> > +}
> > +
> > +; Test fptoui
> > +define i32 @fptoui_wd(double %a) nounwind ssp {
> > +entry:
> > +; CHECK: fptoui_wd
> > +; CHECK: fcvtzu w0, d0
> > +  %conv = fptoui double %a to i32
> > +  ret i32 %conv
> > +}
> > +
> > +; Test sitofp
> > +define float @sitofp_sw_i1(i1 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sitofp_sw_i1
> > +; CHECK: sbfm w0, w0, #0, #0
> > +; CHECK: scvtf s0, w0
> > +  %conv = sitofp i1 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test sitofp
> > +define float @sitofp_sw_i8(i8 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sitofp_sw_i8
> > +; CHECK: sxtb w0, w0
> > +; CHECK: scvtf s0, w0
> > +  %conv = sitofp i8 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test sitofp
> > +define float @sitofp_sw_i16(i16 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sitofp_sw_i16
> > +; CHECK: sxth w0, w0
> > +; CHECK: scvtf s0, w0
> > +  %conv = sitofp i16 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test sitofp
> > +define float @sitofp_sw(i32 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sitofp_sw
> > +; CHECK: scvtf s0, w0
> > +  %conv = sitofp i32 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test sitofp
> > +define float @sitofp_sx(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sitofp_sx
> > +; CHECK: scvtf s0, x0
> > +  %conv = sitofp i64 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test sitofp
> > +define double @sitofp_dw(i32 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sitofp_dw
> > +; CHECK: scvtf d0, w0
> > +  %conv = sitofp i32 %a to double
> > +  ret double %conv
> > +}
> > +
> > +; Test sitofp
> > +define double @sitofp_dx(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: sitofp_dx
> > +; CHECK: scvtf d0, x0
> > +  %conv = sitofp i64 %a to double
> > +  ret double %conv
> > +}
> > +
> > +; Test uitofp
> > +define float @uitofp_sw_i1(i1 %a) nounwind ssp {
> > +entry:
> > +; CHECK: uitofp_sw_i1
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: ucvtf s0, w0
> > +  %conv = uitofp i1 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test uitofp
> > +define float @uitofp_sw_i8(i8 %a) nounwind ssp {
> > +entry:
> > +; CHECK: uitofp_sw_i8
> > +; CHECK: uxtb w0, w0
> > +; CHECK: ucvtf s0, w0
> > +  %conv = uitofp i8 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test uitofp
> > +define float @uitofp_sw_i16(i16 %a) nounwind ssp {
> > +entry:
> > +; CHECK: uitofp_sw_i16
> > +; CHECK: uxth w0, w0
> > +; CHECK: ucvtf s0, w0
> > +  %conv = uitofp i16 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test uitofp
> > +define float @uitofp_sw(i32 %a) nounwind ssp {
> > +entry:
> > +; CHECK: uitofp_sw
> > +; CHECK: ucvtf s0, w0
> > +  %conv = uitofp i32 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test uitofp
> > +define float @uitofp_sx(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: uitofp_sx
> > +; CHECK: ucvtf s0, x0
> > +  %conv = uitofp i64 %a to float
> > +  ret float %conv
> > +}
> > +
> > +; Test uitofp
> > +define double @uitofp_dw(i32 %a) nounwind ssp {
> > +entry:
> > +; CHECK: uitofp_dw
> > +; CHECK: ucvtf d0, w0
> > +  %conv = uitofp i32 %a to double
> > +  ret double %conv
> > +}
> > +
> > +; Test uitofp
> > +define double @uitofp_dx(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: uitofp_dx
> > +; CHECK: ucvtf d0, x0
> > +  %conv = uitofp i64 %a to double
> > +  ret double %conv
> > +}
> > +
> > +define i32 @i64_trunc_i32(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: i64_trunc_i32
> > +; CHECK: mov x1, x0
> > +  %conv = trunc i64 %a to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: i64_trunc_i16
> > +; CHECK: mov x[[REG:[0-9]+]], x0
> > +; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xffff
> > +; CHECK: uxth w0, [[REG2]]
> > +  %conv = trunc i64 %a to i16
> > +  ret i16 %conv
> > +}
> > +
> > +define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: i64_trunc_i8
> > +; CHECK: mov x[[REG:[0-9]+]], x0
> > +; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xff
> > +; CHECK: uxtb w0, [[REG2]]
> > +  %conv = trunc i64 %a to i8
> > +  ret i8 %conv
> > +}
> > +
> > +define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: i64_trunc_i1
> > +; CHECK: mov x[[REG:[0-9]+]], x0
> > +; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0x1
> > +; CHECK: and w0, [[REG2]], #0x1
> > +  %conv = trunc i64 %a to i1
> > +  ret i1 %conv
> > +}
> > +
> > +; rdar://15101939
> > +define void @stack_trunc() nounwind {
> > +; CHECK: stack_trunc
> > +; CHECK: sub  sp, sp, #16
> > +; CHECK: ldr  [[REG:x[0-9]+]], [sp]
> > +; CHECK: mov  x[[REG2:[0-9]+]], [[REG]]
> > +; CHECK: and  [[REG3:w[0-9]+]], w[[REG2]], #0xff
> > +; CHECK: strb [[REG3]], [sp, #15]
> > +; CHECK: add  sp, sp, #16
> > +  %a = alloca i8, align 1
> > +  %b = alloca i64, align 8
> > +  %c = load i64* %b, align 8
> > +  %d = trunc i64 %c to i8
> > +  store i8 %d, i8* %a, align 1
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-fcmp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-fcmp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-fcmp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-fcmp.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,146 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +define zeroext i1 @fcmp_float1(float %a) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_float1
> > +; CHECK: fcmp s0, #0.0
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
> > +  %cmp = fcmp une float %a, 0.000000e+00
> > +  ret i1 %cmp
> > +}
> > +
> > +define zeroext i1 @fcmp_float2(float %a, float %b) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_float2
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
> > +  %cmp = fcmp une float %a, %b
> > +  ret i1 %cmp
> > +}
> > +
> > +define zeroext i1 @fcmp_double1(double %a) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_double1
> > +; CHECK: fcmp d0, #0.0
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
> > +  %cmp = fcmp une double %a, 0.000000e+00
> > +  ret i1 %cmp
> > +}
> > +
> > +define zeroext i1 @fcmp_double2(double %a, double %b) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_double2
> > +; CHECK: fcmp d0, d1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
> > +  %cmp = fcmp une double %a, %b
> > +  ret i1 %cmp
> > +}
> > +
> > +; Check each fcmp condition
> > +define float @fcmp_oeq(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_oeq
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ne
> > +  %cmp = fcmp oeq float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ogt(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ogt
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, le
> > +  %cmp = fcmp ogt float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_oge(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_oge
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, lt
> > +  %cmp = fcmp oge float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_olt(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_olt
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, pl
> > +  %cmp = fcmp olt float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ole(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ole
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, hi
> > +  %cmp = fcmp ole float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ord(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ord
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, vs
> > +  %cmp = fcmp ord float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_uno(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_uno
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, vc
> > +  %cmp = fcmp uno float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ugt(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ugt
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ls
> > +  %cmp = fcmp ugt float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_uge(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_uge
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, mi
> > +  %cmp = fcmp uge float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ult(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ult
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ge
> > +  %cmp = fcmp ult float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ule(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ule
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, gt
> > +  %cmp = fcmp ule float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_une(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_une
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, eq
> > +  %cmp = fcmp une float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-gv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-gv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-gv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-gv.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,38 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +; Test load/store of global value from global offset table.
> > + at seed = common global i64 0, align 8
> > +
> > +define void @Initrand() nounwind {
> > +entry:
> > +; CHECK: @Initrand
> > +; CHECK: adrp x[[REG:[0-9]+]], _seed at GOTPAGE
> > +; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed at GOTPAGEOFF]
> > +; CHECK: str x{{[0-9]+}}, [x[[REG2]]]
> > +  store i64 74755, i64* @seed, align 8
> > +  ret void
> > +}
> > +
> > +define i32 @Rand() nounwind {
> > +entry:
> > +; CHECK: @Rand
> > +; CHECK: adrp x[[REG:[0-9]+]], _seed at GOTPAGE
> > +; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed at GOTPAGEOFF]
> > +; CHECK: movz x[[REG3:[0-9]+]], #1309
> > +; CHECK: ldr x[[REG4:[0-9]+]], [x[[REG2]]]
> > +; CHECK: mul x[[REG5:[0-9]+]], x[[REG4]], x[[REG3]]
> > +; CHECK: movz x[[REG6:[0-9]+]], #13849
> > +; CHECK: add x[[REG7:[0-9]+]], x[[REG5]], x[[REG6]]
> > +; CHECK: orr x[[REG8:[0-9]+]], xzr, #0xffff
> > +; CHECK: and x[[REG9:[0-9]+]], x[[REG7]], x[[REG8]]
> > +; CHECK: str x[[REG9]], [x[[REG]]]
> > +; CHECK: ldr x{{[0-9]+}}, [x[[REG]]]
> > +  %0 = load i64* @seed, align 8
> > +  %mul = mul nsw i64 %0, 1309
> > +  %add = add nsw i64 %mul, 13849
> > +  %and = and i64 %add, 65535
> > +  store i64 %and, i64* @seed, align 8
> > +  %1 = load i64* @seed, align 8
> > +  %conv = trunc i64 %1 to i32
> > +  ret i32 %conv
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-icmp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-icmp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-icmp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-icmp.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,214 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +define i32 @icmp_eq_imm(i32 %a) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_eq_imm
> > +; CHECK: cmp  w0, #31
> > +; CHECK: csinc w0, wzr, wzr, ne
> > +  %cmp = icmp eq i32 %a, 31
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_eq_neg_imm(i32 %a) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_eq_neg_imm
> > +; CHECK: cmn  w0, #7
> > +; CHECK: csinc w0, wzr, wzr, ne
> > +  %cmp = icmp eq i32 %a, -7
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_eq(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_eq
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, ne
> > +  %cmp = icmp eq i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_ne(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_ne
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, eq
> > +  %cmp = icmp ne i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_ugt(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_ugt
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, ls
> > +  %cmp = icmp ugt i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_uge(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_uge
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, cc
> > +  %cmp = icmp uge i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_ult(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_ult
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, cs
> > +  %cmp = icmp ult i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_ule(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_ule
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, hi
> > +  %cmp = icmp ule i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_sgt(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_sgt
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, le
> > +  %cmp = icmp sgt i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_sge(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_sge
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, lt
> > +  %cmp = icmp sge i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_slt(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_slt
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, ge
> > +  %cmp = icmp slt i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_sle(i32 %a, i32 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_sle
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, gt
> > +  %cmp = icmp sle i32 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define i32 @icmp_i64(i64 %a, i64 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_i64
> > +; CHECK: cmp  x0, x1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, gt
> > +  %cmp = icmp sle i64 %a, %b
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> > +
> > +define zeroext i1 @icmp_eq_i16(i16 %a, i16 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_eq_i16
> > +; CHECK: sxth w0, w0
> > +; CHECK: sxth w1, w1
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, ne
> > +  %cmp = icmp eq i16 %a, %b
> > +  ret i1 %cmp
> > +}
> > +
> > +define zeroext i1 @icmp_eq_i8(i8 %a, i8 %b) nounwind ssp {
> > +entry:
> > +; CHECK: icmp_eq_i8
> > +; CHECK: sxtb w0, w0
> > +; CHECK: sxtb w1, w1
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, ne
> > +  %cmp = icmp eq i8 %a, %b
> > +  ret i1 %cmp
> > +}
> > +
> > +define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
> > +entry:
> > +; CHECK: icmp_i16_unsigned
> > +; CHECK: uxth w0, w0
> > +; CHECK: uxth w1, w1
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, cs
> > +  %cmp = icmp ult i16 %a, %b
> > +  %conv2 = zext i1 %cmp to i32
> > +  ret i32 %conv2
> > +}
> > +
> > +define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
> > +entry:
> > +; CHECK: @icmp_i8_signed
> > +; CHECK: sxtb w0, w0
> > +; CHECK: sxtb w1, w1
> > +; CHECK: cmp  w0, w1
> > +; CHECK: csinc w0, wzr, wzr, le
> > +  %cmp = icmp sgt i8 %a, %b
> > +  %conv2 = zext i1 %cmp to i32
> > +  ret i32 %conv2
> > +}
> > +
> > +
> > +define i32 @icmp_i16_signed_const(i16 %a) nounwind {
> > +entry:
> > +; CHECK: icmp_i16_signed_const
> > +; CHECK: sxth w0, w0
> > +; CHECK: cmn  w0, #233
> > +; CHECK: csinc w0, wzr, wzr, ge
> > +; CHECK: and w0, w0, #0x1
> > +  %cmp = icmp slt i16 %a, -233
> > +  %conv2 = zext i1 %cmp to i32
> > +  ret i32 %conv2
> > +}
> > +
> > +define i32 @icmp_i8_signed_const(i8 %a) nounwind {
> > +entry:
> > +; CHECK: icmp_i8_signed_const
> > +; CHECK: sxtb w0, w0
> > +; CHECK: cmp  w0, #124
> > +; CHECK: csinc w0, wzr, wzr, le
> > +; CHECK: and w0, w0, #0x1
> > +  %cmp = icmp sgt i8 %a, 124
> > +  %conv2 = zext i1 %cmp to i32
> > +  ret i32 %conv2
> > +}
> > +
> > +define i32 @icmp_i1_unsigned_const(i1 %a) nounwind {
> > +entry:
> > +; CHECK: icmp_i1_unsigned_const
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: cmp  w0, #0
> > +; CHECK: csinc w0, wzr, wzr, cs
> > +; CHECK: and w0, w0, #0x1
> > +  %cmp = icmp ult i1 %a, 0
> > +  %conv2 = zext i1 %cmp to i32
> > +  ret i32 %conv2
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-indirectbr.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-indirectbr.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-indirectbr.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-indirectbr.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,36 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > + at fn.table = internal global [2 x i8*] [i8* blockaddress(@fn, %ZERO),
> i8* blockaddress(@fn, %ONE)], align 8
> > +
> > +define i32 @fn(i32 %target) nounwind {
> > +entry:
> > +; CHECK: @fn
> > +  %retval = alloca i32, align 4
> > +  %target.addr = alloca i32, align 4
> > +  store i32 %target, i32* %target.addr, align 4
> > +  %0 = load i32* %target.addr, align 4
> > +  %idxprom = zext i32 %0 to i64
> > +  %arrayidx = getelementptr inbounds [2 x i8*]* @fn.table, i32 0, i64
> %idxprom
> > +  %1 = load i8** %arrayidx, align 8
> > +  br label %indirectgoto
> > +
> > +ZERO:                                             ; preds =
> %indirectgoto
> > +; CHECK: LBB0_1
> > +  store i32 0, i32* %retval
> > +  br label %return
> > +
> > +ONE:                                              ; preds =
> %indirectgoto
> > +; CHECK: LBB0_2
> > +  store i32 1, i32* %retval
> > +  br label %return
> > +
> > +return:                                           ; preds = %ONE, %ZERO
> > +  %2 = load i32* %retval
> > +  ret i32 %2
> > +
> > +indirectgoto:                                     ; preds = %entry
> > +; CHECK: ldr x0, [sp]
> > +; CHECK: br x0
> > +  %indirect.goto.dest = phi i8* [ %1, %entry ]
> > +  indirectbr i8* %indirect.goto.dest, [label %ZERO, label %ONE]
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-intrinsic.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-intrinsic.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-intrinsic.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-intrinsic.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,135 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic
> -mtriple=arm64-apple-ios | FileCheck %s --check-prefix=ARM64
> > +
> > + at message = global [80 x i8] c"The LLVM Compiler
> Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00",
> align 16
> > + at temp = common global [80 x i8] zeroinitializer, align 16
> > +
> > +define void @t1() {
> > +; ARM64: t1
> > +; ARM64: adrp x8, _message at PAGE
> > +; ARM64: add x0, x8, _message at PAGEOFF
> > +; ARM64: movz w9, #0
> > +; ARM64: movz x2, #80
> > +; ARM64: uxtb w1, w9
> > +; ARM64: bl _memset
> > +  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x
> i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
> > +
> > +define void @t2() {
> > +; ARM64: t2
> > +; ARM64: adrp x8, _temp at GOTPAGE
> > +; ARM64: ldr x0, [x8, _temp at GOTPAGEOFF]
> > +; ARM64: adrp x8, _message at PAGE
> > +; ARM64: add x1, x8, _message at PAGEOFF
> > +; ARM64: movz x2, #80
> > +; ARM64: bl _memcpy
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80
> x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]*
> @message, i32 0, i32 0), i64 80, i32 16, i1 false)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture,
> i64, i32, i1)
> > +
> > +define void @t3() {
> > +; ARM64: t3
> > +; ARM64: adrp x8, _temp at GOTPAGE
> > +; ARM64: ldr x0, [x8, _temp at GOTPAGEOFF]
> > +; ARM64: adrp x8, _message at PAGE
> > +; ARM64: add x1, x8, _message at PAGEOFF
> > +; ARM64: movz x2, #20
> > +; ARM64: bl _memmove
> > +  call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80
> x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]*
> @message, i32 0, i32 0), i64 20, i32 16, i1 false)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture,
> i64, i32, i1)
> > +
> > +define void @t4() {
> > +; ARM64: t4
> > +; ARM64: adrp x8, _temp at GOTPAGE
> > +; ARM64: ldr x8, [x8, _temp at GOTPAGEOFF]
> > +; ARM64: adrp x9, _message at PAGE
> > +; ARM64: add x9, x9, _message at PAGEOFF
> > +; ARM64: ldr x10, [x9]
> > +; ARM64: str x10, [x8]
> > +; ARM64: ldr x10, [x9, #8]
> > +; ARM64: str x10, [x8, #8]
> > +; ARM64: ldrb w11, [x9, #16]
> > +; ARM64: strb w11, [x8, #16]
> > +; ARM64: ret
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80
> x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]*
> @message, i32 0, i32 0), i64 17, i32 16, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t5() {
> > +; ARM64: t5
> > +; ARM64: adrp x8, _temp at GOTPAGE
> > +; ARM64: ldr x8, [x8, _temp at GOTPAGEOFF]
> > +; ARM64: adrp x9, _message at PAGE
> > +; ARM64: add x9, x9, _message at PAGEOFF
> > +; ARM64: ldr x10, [x9]
> > +; ARM64: str x10, [x8]
> > +; ARM64: ldr x10, [x9, #8]
> > +; ARM64: str x10, [x8, #8]
> > +; ARM64: ldrb w11, [x9, #16]
> > +; ARM64: strb w11, [x8, #16]
> > +; ARM64: ret
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80
> x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]*
> @message, i32 0, i32 0), i64 17, i32 8, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t6() {
> > +; ARM64: t6
> > +; ARM64: adrp x8, _temp at GOTPAGE
> > +; ARM64: ldr x8, [x8, _temp at GOTPAGEOFF]
> > +; ARM64: adrp x9, _message at PAGE
> > +; ARM64: add x9, x9, _message at PAGEOFF
> > +; ARM64: ldr w10, [x9]
> > +; ARM64: str w10, [x8]
> > +; ARM64: ldr w10, [x9, #4]
> > +; ARM64: str w10, [x8, #4]
> > +; ARM64: ldrb w10, [x9, #8]
> > +; ARM64: strb w10, [x8, #8]
> > +; ARM64: ret
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80
> x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]*
> @message, i32 0, i32 0), i64 9, i32 4, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t7() {
> > +; ARM64: t7
> > +; ARM64: adrp x8, _temp at GOTPAGE
> > +; ARM64: ldr x8, [x8, _temp at GOTPAGEOFF]
> > +; ARM64: adrp x9, _message at PAGE
> > +; ARM64: add x9, x9, _message at PAGEOFF
> > +; ARM64: ldrh w10, [x9]
> > +; ARM64: strh w10, [x8]
> > +; ARM64: ldrh w10, [x9, #2]
> > +; ARM64: strh w10, [x8, #2]
> > +; ARM64: ldrh w10, [x9, #4]
> > +; ARM64: strh w10, [x8, #4]
> > +; ARM64: ldrb w10, [x9, #6]
> > +; ARM64: strb w10, [x8, #6]
> > +; ARM64: ret
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80
> x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]*
> @message, i32 0, i32 0), i64 7, i32 2, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t8() {
> > +; ARM64: t8
> > +; ARM64: adrp x8, _temp at GOTPAGE
> > +; ARM64: ldr x8, [x8, _temp at GOTPAGEOFF]
> > +; ARM64: adrp x9, _message at PAGE
> > +; ARM64: add x9, x9, _message at PAGEOFF
> > +; ARM64: ldrb w10, [x9]
> > +; ARM64: strb w10, [x8]
> > +; ARM64: ldrb w10, [x9, #1]
> > +; ARM64: strb w10, [x8, #1]
> > +; ARM64: ldrb w10, [x9, #2]
> > +; ARM64: strb w10, [x8, #2]
> > +; ARM64: ldrb w10, [x9, #3]
> > +; ARM64: strb w10, [x8, #3]
> > +; ARM64: ret
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80
> x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]*
> @message, i32 0, i32 0), i64 4, i32 1, i1 false)
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-materialize.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-materialize.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-materialize.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-materialize.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,27 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +; Materialize using fmov
> > +define void @float_(float* %value) {
> > +; CHECK: @float_
> > +; CHECK: fmov s0, #1.250000e+00
> > +  store float 1.250000e+00, float* %value, align 4
> > +  ret void
> > +}
> > +
> > +define void @double_(double* %value) {
> > +; CHECK: @double_
> > +; CHECK: fmov d0, #1.250000e+00
> > +  store double 1.250000e+00, double* %value, align 8
> > +  ret void
> > +}
> > +
> > +; Materialize from constant pool
> > +define float @float_cp() {
> > +; CHECK: @float_cp
> > +  ret float 0x400921FB60000000
> > +}
> > +
> > +define double @double_cp() {
> > +; CHECK: @double_cp
> > +  ret double 0x400921FB54442D18
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-noconvert.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-noconvert.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-noconvert.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-noconvert.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,36 @@
> > +; RUN: llc -mtriple=arm64-apple-ios -O0 %s -o - | FileCheck %s
> > +
> > +; Fast-isel can't do vector conversions yet, but it was emitting some
> highly
> > +; suspect UCVTFUWDri MachineInstrs.
> > +define <4 x float> @test_uitofp(<4 x i32> %in) {
> > +; CHECK-LABEL: test_uitofp:
> > +; CHECK: ucvtf.4s v0, v0
> > +
> > +  %res = uitofp <4 x i32> %in to <4 x float>
> > +  ret <4 x float> %res
> > +}
> > +
> > +define <2 x double> @test_sitofp(<2 x i32> %in) {
> > +; CHECK-LABEL: test_sitofp:
> > +; CHECK: sshll.2d [[EXT:v[0-9]+]], v0, #0
> > +; CHECK: scvtf.2d v0, [[EXT]]
> > +
> > +  %res = sitofp <2 x i32> %in to <2 x double>
> > +  ret <2 x double> %res
> > +}
> > +
> > +define <2 x i32> @test_fptoui(<2 x float> %in) {
> > +; CHECK-LABEL: test_fptoui:
> > +; CHECK: fcvtzu.2s v0, v0
> > +
> > +  %res = fptoui <2 x float> %in to <2 x i32>
> > +  ret <2 x i32> %res
> > +}
> > +
> > +define <2 x i64> @test_fptosi(<2 x double> %in) {
> > +; CHECK-LABEL: test_fptosi:
> > +; CHECK: fcvtzs.2d v0, v0
> > +
> > +  %res = fptosi <2 x double> %in to <2 x i64>
> > +  ret <2 x i64> %res
> > +}
> > \ No newline at end of file
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-rem.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-rem.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-rem.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-rem.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,33 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +define i32 @t1(i32 %a, i32 %b) {
> > +; CHECK: @t1
> > +; CHECK: sdiv w2, w0, w1
> > +; CHECK: msub w2, w2, w1, w0
> > +  %1 = srem i32 %a, %b
> > +  ret i32 %1
> > +}
> > +
> > +define i64 @t2(i64 %a, i64 %b) {
> > +; CHECK: @t2
> > +; CHECK: sdiv x2, x0, x1
> > +; CHECK: msub x2, x2, x1, x0
> > +  %1 = srem i64 %a, %b
> > +  ret i64 %1
> > +}
> > +
> > +define i32 @t3(i32 %a, i32 %b) {
> > +; CHECK: @t3
> > +; CHECK: udiv w2, w0, w1
> > +; CHECK: msub w2, w2, w1, w0
> > +  %1 = urem i32 %a, %b
> > +  ret i32 %1
> > +}
> > +
> > +define i64 @t4(i64 %a, i64 %b) {
> > +; CHECK: @t4
> > +; CHECK: udiv x2, x0, x1
> > +; CHECK: msub x2, x2, x1, x0
> > +  %1 = urem i64 %a, %b
> > +  ret i64 %1
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-ret.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-ret.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-ret.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-ret.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,63 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +;; Test returns.
> > +define void @t0() nounwind ssp {
> > +entry:
> > +; CHECK: t0
> > +; CHECK: ret
> > +  ret void
> > +}
> > +
> > +define i32 @t1(i32 %a) nounwind ssp {
> > +entry:
> > +; CHECK: t1
> > +; CHECK: str w0, [sp, #12]
> > +; CHECK-NEXT: ldr w0, [sp, #12]
> > +; CHECK: ret
> > +  %a.addr = alloca i32, align 4
> > +  store i32 %a, i32* %a.addr, align 4
> > +  %tmp = load i32* %a.addr, align 4
> > +  ret i32 %tmp
> > +}
> > +
> > +define i64 @t2(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK: t2
> > +; CHECK: str x0, [sp, #8]
> > +; CHECK-NEXT: ldr x0, [sp, #8]
> > +; CHECK: ret
> > +  %a.addr = alloca i64, align 8
> > +  store i64 %a, i64* %a.addr, align 8
> > +  %tmp = load i64* %a.addr, align 8
> > +  ret i64 %tmp
> > +}
> > +
> > +define signext i16 @ret_i16(i16 signext %a) nounwind {
> > +entry:
> > +; CHECK: @ret_i16
> > +; CHECK: sxth  w0, w0
> > +  %a.addr = alloca i16, align 1
> > +  store i16 %a, i16* %a.addr, align 1
> > +  %0 = load i16* %a.addr, align 1
> > +  ret i16 %0
> > +}
> > +
> > +define signext i8 @ret_i8(i8 signext %a) nounwind {
> > +entry:
> > +; CHECK: @ret_i8
> > +; CHECK: sxtb  w0, w0
> > +  %a.addr = alloca i8, align 1
> > +  store i8 %a, i8* %a.addr, align 1
> > +  %0 = load i8* %a.addr, align 1
> > +  ret i8 %0
> > +}
> > +
> > +define signext i1 @ret_i1(i1 signext %a) nounwind {
> > +entry:
> > +; CHECK: @ret_i1
> > +; CHECK: and w0, w0, #0x1
> > +  %a.addr = alloca i1, align 1
> > +  store i1 %a, i1* %a.addr, align 1
> > +  %0 = load i1* %a.addr, align 1
> > +  ret i1 %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel-select.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel-select.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel-select.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel-select.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,63 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +define i32 @t1(i32 %c) nounwind readnone {
> > +entry:
> > +; CHECK: @t1
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: subs w0, w0, #0
> > +; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
> > +  %0 = icmp sgt i32 %c, 1
> > +  %1 = select i1 %0, i32 123, i32 357
> > +  ret i32 %1
> > +}
> > +
> > +define i64 @t2(i32 %c) nounwind readnone {
> > +entry:
> > +; CHECK: @t2
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: subs w0, w0, #0
> > +; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
> > +  %0 = icmp sgt i32 %c, 1
> > +  %1 = select i1 %0, i64 123, i64 357
> > +  ret i64 %1
> > +}
> > +
> > +define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
> > +entry:
> > +; CHECK: @t3
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: subs w0, w0, #0
> > +; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
> > +  %0 = select i1 %c, i32 %a, i32 %b
> > +  ret i32 %0
> > +}
> > +
> > +define i64 @t4(i1 %c, i64 %a, i64 %b) nounwind readnone {
> > +entry:
> > +; CHECK: @t4
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: subs w0, w0, #0
> > +; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
> > +  %0 = select i1 %c, i64 %a, i64 %b
> > +  ret i64 %0
> > +}
> > +
> > +define float @t5(i1 %c, float %a, float %b) nounwind readnone {
> > +entry:
> > +; CHECK: @t5
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: subs w0, w0, #0
> > +; CHECK: fcsel s0, s0, s1, ne
> > +  %0 = select i1 %c, float %a, float %b
> > +  ret float %0
> > +}
> > +
> > +define double @t6(i1 %c, double %a, double %b) nounwind readnone {
> > +entry:
> > +; CHECK: @t6
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: subs w0, w0, #0
> > +; CHECK: fcsel d0, d0, d1, ne
> > +  %0 = select i1 %c, double %a, double %b
> > +  ret double %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fast-isel.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fast-isel.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fast-isel.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fast-isel.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,95 @@
> > +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin |
> FileCheck %s
> > +
> > +define void @t0(i32 %a) nounwind {
> > +entry:
> > +; CHECK: t0
> > +; CHECK: str {{w[0-9]+}}, [sp, #12]
> > +; CHECK-NEXT: ldr [[REGISTER:w[0-9]+]], [sp, #12]
> > +; CHECK-NEXT: str [[REGISTER]], [sp, #12]
> > +; CHECK: ret
> > +  %a.addr = alloca i32, align 4
> > +  store i32 %a, i32* %a.addr
> > +  %tmp = load i32* %a.addr
> > +  store i32 %tmp, i32* %a.addr
> > +  ret void
> > +}
> > +
> > +define void @t1(i64 %a) nounwind {
> > +; CHECK: t1
> > +; CHECK: str {{x[0-9]+}}, [sp, #8]
> > +; CHECK-NEXT: ldr [[REGISTER:x[0-9]+]], [sp, #8]
> > +; CHECK-NEXT: str [[REGISTER]], [sp, #8]
> > +; CHECK: ret
> > +  %a.addr = alloca i64, align 4
> > +  store i64 %a, i64* %a.addr
> > +  %tmp = load i64* %a.addr
> > +  store i64 %tmp, i64* %a.addr
> > +  ret void
> > +}
> > +
> > +define zeroext i1 @i1(i1 %a) nounwind {
> > +entry:
> > +; CHECK: @i1
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: strb w0, [sp, #15]
> > +; CHECK: ldrb w0, [sp, #15]
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: and w0, w0, #0x1
> > +; CHECK: add sp, sp, #16
> > +; CHECK: ret
> > +  %a.addr = alloca i1, align 1
> > +  store i1 %a, i1* %a.addr, align 1
> > +  %0 = load i1* %a.addr, align 1
> > +  ret i1 %0
> > +}
> > +
> > +define i32 @t2(i32 *%ptr) nounwind {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: ldur w0, [x0, #-4]
> > +; CHECK: ret
> > +  %0 = getelementptr i32 *%ptr, i32 -1
> > +  %1 = load i32* %0, align 4
> > +  ret i32 %1
> > +}
> > +
> > +define i32 @t3(i32 *%ptr) nounwind {
> > +entry:
> > +; CHECK-LABEL: t3:
> > +; CHECK: ldur w0, [x0, #-256]
> > +; CHECK: ret
> > +  %0 = getelementptr i32 *%ptr, i32 -64
> > +  %1 = load i32* %0, align 4
> > +  ret i32 %1
> > +}
> > +
> > +define void @t4(i32 *%ptr) nounwind {
> > +entry:
> > +; CHECK-LABEL: t4:
> > +; CHECK: movz w8, #0
> > +; CHECK: stur w8, [x0, #-4]
> > +; CHECK: ret
> > +  %0 = getelementptr i32 *%ptr, i32 -1
> > +  store i32 0, i32* %0, align 4
> > +  ret void
> > +}
> > +
> > +define void @t5(i32 *%ptr) nounwind {
> > +entry:
> > +; CHECK-LABEL: t5:
> > +; CHECK: movz w8, #0
> > +; CHECK: stur w8, [x0, #-256]
> > +; CHECK: ret
> > +  %0 = getelementptr i32 *%ptr, i32 -64
> > +  store i32 0, i32* %0, align 4
> > +  ret void
> > +}
> > +
> > +define void @t6() nounwind {
> > +; CHECK: t6
> > +; CHECK: brk #1
> > +  tail call void @llvm.trap()
> > +  ret void
> > +}
> > +
> > +declare void @llvm.trap() nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fastcc-tailcall.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fastcc-tailcall.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fastcc-tailcall.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fastcc-tailcall.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,24 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define void @caller(i32* nocapture %p, i32 %a, i32 %b) nounwind optsize
> ssp {
> > +; CHECK-NOT: stp
> > +; CHECK: b       {{_callee|callee}}
> > +; CHECK-NOT: ldp
> > +; CHECK: ret
> > +  %1 = icmp eq i32 %b, 0
> > +  br i1 %1, label %3, label %2
> > +
> > +  tail call fastcc void @callee(i32* %p, i32 %a) optsize
> > +  br label %3
> > +
> > +  ret void
> > +}
> > +
> > +define internal fastcc void @callee(i32* nocapture %p, i32 %a) nounwind
> optsize noinline ssp {
> > +  store volatile i32 %a, i32* %p, align 4, !tbaa !0
> > +  ret void
> > +}
> > +
> > +!0 = metadata !{metadata !"int", metadata !1}
> > +!1 = metadata !{metadata !"omnipotent char", metadata !2}
> > +!2 = metadata !{metadata !"Simple C/C++ TBAA"}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll
> (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll Sat
> Mar 29 05:18:08 2014
> > @@ -0,0 +1,18 @@
> > +; fastisel should not fold add with non-pointer bitwidth
> > +; sext(a) + sext(b) != sext(a + b)
> > +; RUN: llc -mtriple=arm64-apple-darwin %s -O0 -o - | FileCheck %s
> > +
> > +define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
> > +entry:
> > +  %ptr.addr = alloca i8*, align 8
> > +  %add = add i8 64, 64 ; 0x40 + 0x40
> > +  %0 = load i8** %ptr.addr, align 8
> > +
> > +  ; CHECK-LABEL: _gep_promotion:
> > +  ; CHECK: ldrb {{[a-z][0-9]+}}, {{\[[a-z][0-9]+\]}}
> > +  %arrayidx = getelementptr inbounds i8* %0, i8 %add
> > +
> > +  %1 = load i8* %arrayidx, align 1
> > +  ret i8 %1
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fcmp-opt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fcmp-opt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fcmp-opt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fcmp-opt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,173 @@
> > +; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
> > +; rdar://10263824
> > +
> > +define i1 @fcmp_float1(float %a) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_float1
> > +; CHECK: fcmp s0, #0.0
> > +; CHECK: csinc w0, wzr, wzr, eq
> > +  %cmp = fcmp une float %a, 0.000000e+00
> > +  ret i1 %cmp
> > +}
> > +
> > +define i1 @fcmp_float2(float %a, float %b) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_float2
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w0, wzr, wzr, eq
> > +  %cmp = fcmp une float %a, %b
> > +  ret i1 %cmp
> > +}
> > +
> > +define i1 @fcmp_double1(double %a) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_double1
> > +; CHECK: fcmp d0, #0.0
> > +; CHECK: csinc w0, wzr, wzr, eq
> > +  %cmp = fcmp une double %a, 0.000000e+00
> > +  ret i1 %cmp
> > +}
> > +
> > +define i1 @fcmp_double2(double %a, double %b) nounwind ssp {
> > +entry:
> > +; CHECK: @fcmp_double2
> > +; CHECK: fcmp d0, d1
> > +; CHECK: csinc w0, wzr, wzr, eq
> > +  %cmp = fcmp une double %a, %b
> > +  ret i1 %cmp
> > +}
> > +
> > +; Check each fcmp condition
> > +define float @fcmp_oeq(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_oeq
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ne
> > +  %cmp = fcmp oeq float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ogt(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ogt
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, le
> > +  %cmp = fcmp ogt float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_oge(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_oge
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, lt
> > +  %cmp = fcmp oge float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_olt(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_olt
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, pl
> > +  %cmp = fcmp olt float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ole(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ole
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, hi
> > +  %cmp = fcmp ole float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ord(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ord
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, vs
> > +  %cmp = fcmp ord float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_uno(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_uno
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, vc
> > +  %cmp = fcmp uno float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ugt(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ugt
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ls
> > +  %cmp = fcmp ugt float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_uge(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_uge
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, mi
> > +  %cmp = fcmp uge float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ult(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ult
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ge
> > +  %cmp = fcmp ult float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_ule(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ule
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, gt
> > +  %cmp = fcmp ule float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +define float @fcmp_une(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_une
> > +; CHECK: fcmp s0, s1
> > +; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
> > +  %cmp = fcmp une float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +; Possible opportunity for improvement.  See comment in
> > +; ARM64TargetLowering::LowerSETCC()
> > +define float @fcmp_one(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_one
> > +;      fcmp    s0, s1
> > +;      orr     w0, wzr, #0x1
> > +;      csel    w1, w0, wzr, mi
> > +;      csel    w0, w0, wzr, gt
> > +  %cmp = fcmp one float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> > +
> > +; Possible opportunity for improvement.  See comment in
> > +; ARM64TargetLowering::LowerSETCC()
> > +define float @fcmp_ueq(float %a, float %b) nounwind ssp {
> > +; CHECK: @fcmp_ueq
> > +; CHECK: fcmp s0, s1
> > +;        orr w0, wzr, #0x1
> > +; CHECK: csel [[REG1:w[0-9]]], [[REG2:w[0-9]+]], wzr, eq
> > +; CHECK: csel {{w[0-9]+}}, [[REG2]], [[REG1]], vs
> > +  %cmp = fcmp ueq float %a, %b
> > +  %conv = uitofp i1 %cmp to float
> > +  ret float %conv
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fcopysign.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fcopysign.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fcopysign.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fcopysign.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,51 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
> > +
> > +; rdar://9332258
> > +
> > +define float @test1(float %x, float %y) nounwind {
> > +entry:
> > +; CHECK-LABEL: test1:
> > +; CHECK: movi.4s       v2, #128, lsl #24
> > +; CHECK: bit.16b       v0, v1, v2
> > +  %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
> > +  ret float %0
> > +}
> > +
> > +define double @test2(double %x, double %y) nounwind {
> > +entry:
> > +; CHECK-LABEL: test2:
> > +; CHECK: movi.2d       v2, #0
> > +; CHECK: fneg.2d       v2, v2
> > +; CHECK: bit.16b       v0, v1, v2
> > +  %0 = tail call double @copysign(double %x, double %y) nounwind
> readnone
> > +  ret double %0
> > +}
> > +
> > +; rdar://9545768
> > +define double @test3(double %a, float %b, float %c) nounwind {
> > +; CHECK-LABEL: test3:
> > +; CHECK: fcvt d1, s1
> > +; CHECK: fneg.2d v2, v{{[0-9]+}}
> > +; CHECK: bit.16b v0, v1, v2
> > +  %tmp1 = fadd float %b, %c
> > +  %tmp2 = fpext float %tmp1 to double
> > +  %tmp = tail call double @copysign( double %a, double %tmp2 ) nounwind
> readnone
> > +  ret double %tmp
> > +}
> > +
> > +define float @test4() nounwind {
> > +entry:
> > +; CHECK-LABEL: test4:
> > +; CHECK: fcvt s0, d0
> > +; CHECK: movi.4s v[[CONST:[0-9]+]], #128, lsl #24
> > +; CHECK: bit.16b v{{[0-9]+}}, v0, v[[CONST]]
> > +  %0 = tail call double (...)* @bar() nounwind
> > +  %1 = fptrunc double %0 to float
> > +  %2 = tail call float @copysignf(float 5.000000e-01, float %1)
> nounwind readnone
> > +  %3 = fadd float %1, %2
> > +  ret float %3
> > +}
> > +
> > +declare double @bar(...)
> > +declare double @copysign(double, double) nounwind readnone
> > +declare float @copysignf(float, float) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll
> (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll
> Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,15 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +; DAGCombine to transform a conversion of an extract_vector_elt to an
> > +; extract_vector_elt of a conversion, which saves a round trip of copies
> > +; of the value to a GPR and back to and FPR.
> > +; rdar://11855286
> > +define double @foo0(<2 x i64> %a) nounwind {
> > +; CHECK:  scvtf.2d  [[REG:v[0-9]+]], v0, #9
> > +; CHECK-NEXT:  ins.d v0[0], [[REG]][1]
> > +  %vecext = extractelement <2 x i64> %a, i32 1
> > +  %fcvt_n = tail call double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64
> %vecext, i32 9)
> > +  ret double %fcvt_n
> > +}
> > +
> > +declare double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64, i32) nounwind
> readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fmadd.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fmadd.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fmadd.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fmadd.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,74 @@
> > +; RUN: llc -march=arm64 < %s | FileCheck %s
> > +
> > +define float @fma32(float %a, float %b, float %c) nounwind readnone ssp
> {
> > +entry:
> > +; CHECK-LABEL: fma32:
> > +; CHECK: fmadd
> > +  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c)
> > +  ret float %0
> > +}
> > +
> > +define float @fnma32(float %a, float %b, float %c) nounwind readnone
> ssp {
> > +entry:
> > +; CHECK-LABEL: fnma32:
> > +; CHECK: fnmadd
> > +  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c)
> > +  %mul = fmul float %0, -1.000000e+00
> > +  ret float %mul
> > +}
> > +
> > +define float @fms32(float %a, float %b, float %c) nounwind readnone ssp
> {
> > +entry:
> > +; CHECK-LABEL: fms32:
> > +; CHECK: fmsub
> > +  %mul = fmul float %b, -1.000000e+00
> > +  %0 = tail call float @llvm.fma.f32(float %a, float %mul, float %c)
> > +  ret float %0
> > +}
> > +
> > +define float @fnms32(float %a, float %b, float %c) nounwind readnone
> ssp {
> > +entry:
> > +; CHECK-LABEL: fnms32:
> > +; CHECK: fnmsub
> > +  %mul = fmul float %c, -1.000000e+00
> > +  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %mul)
> > +  ret float %0
> > +}
> > +
> > +define double @fma64(double %a, double %b, double %c) nounwind readnone
> ssp {
> > +; CHECK-LABEL: fma64:
> > +; CHECK: fmadd
> > +entry:
> > +  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %c)
> > +  ret double %0
> > +}
> > +
> > +define double @fnma64(double %a, double %b, double %c) nounwind
> readnone ssp {
> > +; CHECK-LABEL: fnma64:
> > +; CHECK: fnmadd
> > +entry:
> > +  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %c)
> > +  %mul = fmul double %0, -1.000000e+00
> > +  ret double %mul
> > +}
> > +
> > +define double @fms64(double %a, double %b, double %c) nounwind readnone
> ssp {
> > +; CHECK-LABEL: fms64:
> > +; CHECK: fmsub
> > +entry:
> > +  %mul = fmul double %b, -1.000000e+00
> > +  %0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c)
> > +  ret double %0
> > +}
> > +
> > +define double @fnms64(double %a, double %b, double %c) nounwind
> readnone ssp {
> > +; CHECK-LABEL: fnms64:
> > +; CHECK: fnmsub
> > +entry:
> > +  %mul = fmul double %c, -1.000000e+00
> > +  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul)
> > +  ret double %0
> > +}
> > +
> > +declare float @llvm.fma.f32(float, float, float) nounwind readnone
> > +declare double @llvm.fma.f64(double, double, double) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fmax.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fmax.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fmax.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fmax.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,21 @@
> > +; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
> > +
> > +define double @test_direct(float %in) #1 {
> > +entry:
> > +  %cmp = fcmp olt float %in, 0.000000e+00
> > +  %longer = fpext float %in to double
> > +  %val = select i1 %cmp, double 0.000000e+00, double %longer
> > +  ret double %val
> > +
> > +; CHECK: fmax
> > +}
> > +
> > +define double @test_cross(float %in) #1 {
> > +entry:
> > +  %cmp = fcmp olt float %in, 0.000000e+00
> > +  %longer = fpext float %in to double
> > +  %val = select i1 %cmp, double %longer, double 0.000000e+00
> > +  ret double %val
> > +
> > +; CHECK: fmin
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fmuladd.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fmuladd.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fmuladd.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fmuladd.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,88 @@
> > +; RUN: llc -asm-verbose=false < %s -march=arm64
> -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define float @test_f32(float* %A, float* %B, float* %C) nounwind {
> > +;CHECK-LABEL: test_f32:
> > +;CHECK: fmadd
> > +;CHECK-NOT: fmadd
> > +  %tmp1 = load float* %A
> > +  %tmp2 = load float* %B
> > +  %tmp3 = load float* %C
> > +  %tmp4 = call float @llvm.fmuladd.f32(float %tmp1, float %tmp2, float
> %tmp3)
> > +  ret float %tmp4
> > +}
> > +
> > +define <2 x float> @test_v2f32(<2 x float>* %A, <2 x float>* %B, <2 x
> float>* %C) nounwind {
> > +;CHECK-LABEL: test_v2f32:
> > +;CHECK: fmla.2s
> > +;CHECK-NOT: fmla.2s
> > +  %tmp1 = load <2 x float>* %A
> > +  %tmp2 = load <2 x float>* %B
> > +  %tmp3 = load <2 x float>* %C
> > +  %tmp4 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %tmp1, <2 x
> float> %tmp2, <2 x float> %tmp3)
> > +  ret <2 x float> %tmp4
> > +}
> > +
> > +define <4 x float> @test_v4f32(<4 x float>* %A, <4 x float>* %B, <4 x
> float>* %C) nounwind {
> > +;CHECK-LABEL: test_v4f32:
> > +;CHECK: fmla.4s
> > +;CHECK-NOT: fmla.4s
> > +  %tmp1 = load <4 x float>* %A
> > +  %tmp2 = load <4 x float>* %B
> > +  %tmp3 = load <4 x float>* %C
> > +  %tmp4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp1, <4 x
> float> %tmp2, <4 x float> %tmp3)
> > +  ret <4 x float> %tmp4
> > +}
> > +
> > +define <8 x float> @test_v8f32(<8 x float>* %A, <8 x float>* %B, <8 x
> float>* %C) nounwind {
> > +;CHECK-LABEL: test_v8f32:
> > +;CHECK: fmla.4s
> > +;CHECK: fmla.4s
> > +;CHECK-NOT: fmla.4s
> > +  %tmp1 = load <8 x float>* %A
> > +  %tmp2 = load <8 x float>* %B
> > +  %tmp3 = load <8 x float>* %C
> > +  %tmp4 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %tmp1, <8 x
> float> %tmp2, <8 x float> %tmp3)
> > +  ret <8 x float> %tmp4
> > +}
> > +
> > +define double @test_f64(double* %A, double* %B, double* %C) nounwind {
> > +;CHECK-LABEL: test_f64:
> > +;CHECK: fmadd
> > +;CHECK-NOT: fmadd
> > +  %tmp1 = load double* %A
> > +  %tmp2 = load double* %B
> > +  %tmp3 = load double* %C
> > +  %tmp4 = call double @llvm.fmuladd.f64(double %tmp1, double %tmp2,
> double %tmp3)
> > +  ret double %tmp4
> > +}
> > +
> > +define <2 x double> @test_v2f64(<2 x double>* %A, <2 x double>* %B, <2
> x double>* %C) nounwind {
> > +;CHECK-LABEL: test_v2f64:
> > +;CHECK: fmla.2d
> > +;CHECK-NOT: fmla.2d
> > +  %tmp1 = load <2 x double>* %A
> > +  %tmp2 = load <2 x double>* %B
> > +  %tmp3 = load <2 x double>* %C
> > +  %tmp4 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %tmp1, <2
> x double> %tmp2, <2 x double> %tmp3)
> > +  ret <2 x double> %tmp4
> > +}
> > +
> > +define <4 x double> @test_v4f64(<4 x double>* %A, <4 x double>* %B, <4
> x double>* %C) nounwind {
> > +;CHECK-LABEL: test_v4f64:
> > +;CHECK: fmla.2d
> > +;CHECK: fmla.2d
> > +;CHECK-NOT: fmla.2d
> > +  %tmp1 = load <4 x double>* %A
> > +  %tmp2 = load <4 x double>* %B
> > +  %tmp3 = load <4 x double>* %C
> > +  %tmp4 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %tmp1, <4
> x double> %tmp2, <4 x double> %tmp3)
> > +  ret <4 x double> %tmp4
> > +}
> > +
> > +declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
> > +declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x
> float>) nounwind readnone
> > +declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x
> float>) nounwind readnone
> > +declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x
> float>) nounwind readnone
> > +declare double @llvm.fmuladd.f64(double, double, double) nounwind
> readnone
> > +declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2
> x double>) nounwind readnone
> > +declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4
> x double>) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fold-address.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fold-address.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fold-address.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fold-address.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,79 @@
> > +; RUN: llc < %s -O2 -mtriple=arm64-apple-darwin | FileCheck %s
> > +
> > +%0 = type opaque
> > +%struct.CGRect = type { %struct.CGPoint, %struct.CGSize }
> > +%struct.CGPoint = type { double, double }
> > +%struct.CGSize = type { double, double }
> > +
> > +@"OBJC_IVAR_$_UIScreen._bounds" = external hidden global i64, section
> "__DATA, __objc_ivar", align 8
> > +
> > +define hidden %struct.CGRect @nofold(%0* nocapture %self, i8* nocapture
> %_cmd) nounwind readonly optsize ssp {
> > +entry:
> > +; CHECK-LABEL: nofold:
> > +; CHECK: add x[[REG:[0-9]+]], x0, x{{[0-9]+}}
> > +; CHECK: ldp d0, d1, [x[[REG]]]
> > +; CHECK: ldp d2, d3, [x[[REG]], #16]
> > +; CHECK: ret
> > +  %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8,
> !invariant.load !4
> > +  %0 = bitcast %0* %self to i8*
> > +  %add.ptr = getelementptr inbounds i8* %0, i64 %ivar
> > +  %add.ptr10.0 = bitcast i8* %add.ptr to double*
> > +  %tmp11 = load double* %add.ptr10.0, align 8
> > +  %add.ptr.sum = add i64 %ivar, 8
> > +  %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
> > +  %1 = bitcast i8* %add.ptr10.1 to double*
> > +  %tmp12 = load double* %1, align 8
> > +  %add.ptr.sum17 = add i64 %ivar, 16
> > +  %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum17
> > +  %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
> > +  %tmp = load double* %add.ptr4.1.0, align 8
> > +  %add.ptr4.1.sum = add i64 %ivar, 24
> > +  %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %add.ptr4.1.sum
> > +  %2 = bitcast i8* %add.ptr4.1.1 to double*
> > +  %tmp5 = load double* %2, align 8
> > +  %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
> > +  %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
> > +  %insert = insertvalue %struct.CGRect undef, %struct.CGPoint
> %insert16, 0
> > +  %insert7 = insertvalue %struct.CGSize undef, double %tmp, 0
> > +  %insert9 = insertvalue %struct.CGSize %insert7, double %tmp5, 1
> > +  %insert3 = insertvalue %struct.CGRect %insert, %struct.CGSize
> %insert9, 1
> > +  ret %struct.CGRect %insert3
> > +}
> > +
> > +define hidden %struct.CGRect @fold(%0* nocapture %self, i8* nocapture
> %_cmd) nounwind readonly optsize ssp {
> > +entry:
> > +; CHECK-LABEL: fold:
> > +; CHECK: ldr d0, [x0, x{{[0-9]+}}]
> > +; CHECK-NOT: add x0, x0, x1
> > +; CHECK: ret
> > +  %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8,
> !invariant.load !4
> > +  %0 = bitcast %0* %self to i8*
> > +  %add.ptr = getelementptr inbounds i8* %0, i64 %ivar
> > +  %add.ptr10.0 = bitcast i8* %add.ptr to double*
> > +  %tmp11 = load double* %add.ptr10.0, align 8
> > +  %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %ivar
> > +  %1 = bitcast i8* %add.ptr10.1 to double*
> > +  %tmp12 = load double* %1, align 8
> > +  %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %ivar
> > +  %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
> > +  %tmp = load double* %add.ptr4.1.0, align 8
> > +  %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %ivar
> > +  %2 = bitcast i8* %add.ptr4.1.1 to double*
> > +  %tmp5 = load double* %2, align 8
> > +  %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
> > +  %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
> > +  %insert = insertvalue %struct.CGRect undef, %struct.CGPoint
> %insert16, 0
> > +  %insert7 = insertvalue %struct.CGSize undef, double %tmp, 0
> > +  %insert9 = insertvalue %struct.CGSize %insert7, double %tmp5, 1
> > +  %insert3 = insertvalue %struct.CGRect %insert, %struct.CGSize
> %insert9, 1
> > +  ret %struct.CGRect %insert3
> > +}
> > +
> > +
> > +!llvm.module.flags = !{!0, !1, !2, !3}
> > +
> > +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
> > +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32
> 0}
> > +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section",
> metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
> > +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32
> 0}
> > +!4 = metadata !{}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fold-lsl.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fold-lsl.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fold-lsl.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fold-lsl.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,79 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +;
> > +; <rdar://problem/14486451>
> > +
> > +%struct.a = type [256 x i16]
> > +%struct.b = type [256 x i32]
> > +%struct.c = type [256 x i64]
> > +
> > +define i16 @load_halfword(%struct.a* %ctx, i32 %xor72) nounwind {
> > +; CHECK-LABEL: load_halfword:
> > +; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
> > +; CHECK: ldrh w0, [x0, [[REG]], lsl #1]
> > +  %shr81 = lshr i32 %xor72, 9
> > +  %conv82 = zext i32 %shr81 to i64
> > +  %idxprom83 = and i64 %conv82, 255
> > +  %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64
> %idxprom83
> > +  %result = load i16* %arrayidx86, align 2
> > +  ret i16 %result
> > +}
> > +
> > +define i32 @load_word(%struct.b* %ctx, i32 %xor72) nounwind {
> > +; CHECK-LABEL: load_word:
> > +; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
> > +; CHECK: ldr w0, [x0, [[REG]], lsl #2]
> > +  %shr81 = lshr i32 %xor72, 9
> > +  %conv82 = zext i32 %shr81 to i64
> > +  %idxprom83 = and i64 %conv82, 255
> > +  %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64
> %idxprom83
> > +  %result = load i32* %arrayidx86, align 4
> > +  ret i32 %result
> > +}
> > +
> > +define i64 @load_doubleword(%struct.c* %ctx, i32 %xor72) nounwind {
> > +; CHECK-LABEL: load_doubleword:
> > +; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
> > +; CHECK: ldr x0, [x0, [[REG]], lsl #3]
> > +  %shr81 = lshr i32 %xor72, 9
> > +  %conv82 = zext i32 %shr81 to i64
> > +  %idxprom83 = and i64 %conv82, 255
> > +  %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64
> %idxprom83
> > +  %result = load i64* %arrayidx86, align 8
> > +  ret i64 %result
> > +}
> > +
> > +define void @store_halfword(%struct.a* %ctx, i32 %xor72, i16 %val)
> nounwind {
> > +; CHECK-LABEL: store_halfword:
> > +; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
> > +; CHECK: strh w2, [x0, [[REG]], lsl #1]
> > +  %shr81 = lshr i32 %xor72, 9
> > +  %conv82 = zext i32 %shr81 to i64
> > +  %idxprom83 = and i64 %conv82, 255
> > +  %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64
> %idxprom83
> > +  store i16 %val, i16* %arrayidx86, align 8
> > +  ret void
> > +}
> > +
> > +define void @store_word(%struct.b* %ctx, i32 %xor72, i32 %val) nounwind
> {
> > +; CHECK-LABEL: store_word:
> > +; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
> > +; CHECK: str w2, [x0, [[REG]], lsl #2]
> > +  %shr81 = lshr i32 %xor72, 9
> > +  %conv82 = zext i32 %shr81 to i64
> > +  %idxprom83 = and i64 %conv82, 255
> > +  %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64
> %idxprom83
> > +  store i32 %val, i32* %arrayidx86, align 8
> > +  ret void
> > +}
> > +
> > +define void @store_doubleword(%struct.c* %ctx, i32 %xor72, i64 %val)
> nounwind {
> > +; CHECK-LABEL: store_doubleword:
> > +; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
> > +; CHECK: str x2, [x0, [[REG]], lsl #3]
> > +  %shr81 = lshr i32 %xor72, 9
> > +  %conv82 = zext i32 %shr81 to i64
> > +  %idxprom83 = and i64 %conv82, 255
> > +  %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64
> %idxprom83
> > +  store i64 %val, i64* %arrayidx86, align 8
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fp-imm.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fp-imm.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fp-imm.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fp-imm.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,21 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
> > +
> > +; CHECK: literal8
> > +; CHECK: .quad  4614256656552045848
> > +define double @foo() {
> > +; CHECK: _foo:
> > +; CHECK: adrp x[[REG:[0-9]+]], lCPI0_0 at PAGE
> > +; CHECK: ldr  d0, [x[[REG]], lCPI0_0 at PAGEOFF]
> > +; CHECK-NEXT: ret
> > +  ret double 0x400921FB54442D18
> > +}
> > +
> > +; CHECK: literal4
> > +; CHECK: .long 1078530011
> > +define float @bar() {
> > +; CHECK: _bar:
> > +; CHECK:  adrp  x[[REG:[0-9]+]], lCPI1_0 at PAGE
> > +; CHECK:  ldr s0, [x[[REG]], lCPI1_0 at PAGEOFF]
> > +; CHECK-NEXT:  ret
> > +  ret float 0x400921FB60000000
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fp.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,8 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define float @t1(i1 %a, float %b, float %c) nounwind {
> > +; CHECK: t1
> > +; CHECK: fcsel s0, s0, s1, ne
> > +  %sel = select i1 %a, float %b, float %c
> > +  ret float %sel
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fp128-folding.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fp128-folding.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fp128-folding.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fp128-folding.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,17 @@
> > +; RUN: llc -march=arm64 -verify-machineinstrs < %s | FileCheck %s
> > +declare void @bar(i8*, i8*, i32*)
> > +
> > +; SelectionDAG used to try to fold some fp128 operations using the
> ppc128 type,
> > +; which is not supported.
> > +
> > +define fp128 @test_folding() {
> > +; CHECK-LABEL: test_folding:
> > +  %l = alloca i32
> > +  store i32 42, i32* %l
> > +  %val = load i32* %l
> > +  %fpval = sitofp i32 %val to fp128
> > +  ; If the value is loaded from a constant pool into an fp128, it's
> been folded
> > +  ; successfully.
> > +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}},
> > +  ret fp128 %fpval
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/fp128.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/fp128.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/fp128.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/fp128.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,274 @@
> > +; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs < %s |
> FileCheck %s
> > +
> > + at lhs = global fp128 zeroinitializer, align 16
> > + at rhs = global fp128 zeroinitializer, align 16
> > +
> > +define fp128 @test_add() {
> > +; CHECK-LABEL: test_add:
> > +
> > +  %lhs = load fp128* @lhs, align 16
> > +  %rhs = load fp128* @rhs, align 16
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
> > +; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
> > +
> > +  %val = fadd fp128 %lhs, %rhs
> > +; CHECK: bl __addtf3
> > +  ret fp128 %val
> > +}
> > +
> > +define fp128 @test_sub() {
> > +; CHECK-LABEL: test_sub:
> > +
> > +  %lhs = load fp128* @lhs, align 16
> > +  %rhs = load fp128* @rhs, align 16
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
> > +; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
> > +
> > +  %val = fsub fp128 %lhs, %rhs
> > +; CHECK: bl __subtf3
> > +  ret fp128 %val
> > +}
> > +
> > +define fp128 @test_mul() {
> > +; CHECK-LABEL: test_mul:
> > +
> > +  %lhs = load fp128* @lhs, align 16
> > +  %rhs = load fp128* @rhs, align 16
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
> > +; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
> > +
> > +  %val = fmul fp128 %lhs, %rhs
> > +; CHECK: bl __multf3
> > +  ret fp128 %val
> > +}
> > +
> > +define fp128 @test_div() {
> > +; CHECK-LABEL: test_div:
> > +
> > +  %lhs = load fp128* @lhs, align 16
> > +  %rhs = load fp128* @rhs, align 16
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
> > +; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
> > +
> > +  %val = fdiv fp128 %lhs, %rhs
> > +; CHECK: bl __divtf3
> > +  ret fp128 %val
> > +}
> > +
> > + at var32 = global i32 0
> > + at var64 = global i64 0
> > +
> > +define void @test_fptosi() {
> > +; CHECK-LABEL: test_fptosi:
> > +  %val = load fp128* @lhs, align 16
> > +
> > +  %val32 = fptosi fp128 %val to i32
> > +  store i32 %val32, i32* @var32
> > +; CHECK: bl __fixtfsi
> > +
> > +  %val64 = fptosi fp128 %val to i64
> > +  store i64 %val64, i64* @var64
> > +; CHECK: bl __fixtfdi
> > +
> > +  ret void
> > +}
> > +
> > +define void @test_fptoui() {
> > +; CHECK-LABEL: test_fptoui:
> > +  %val = load fp128* @lhs, align 16
> > +
> > +  %val32 = fptoui fp128 %val to i32
> > +  store i32 %val32, i32* @var32
> > +; CHECK: bl __fixunstfsi
> > +
> > +  %val64 = fptoui fp128 %val to i64
> > +  store i64 %val64, i64* @var64
> > +; CHECK: bl __fixunstfdi
> > +
> > +  ret void
> > +}
> > +
> > +define void @test_sitofp() {
> > +; CHECK-LABEL: test_sitofp:
> > +
> > +  %src32 = load i32* @var32
> > +  %val32 = sitofp i32 %src32 to fp128
> > +  store volatile fp128 %val32, fp128* @lhs
> > +; CHECK: bl __floatsitf
> > +
> > +  %src64 = load i64* @var64
> > +  %val64 = sitofp i64 %src64 to fp128
> > +  store volatile fp128 %val64, fp128* @lhs
> > +; CHECK: bl __floatditf
> > +
> > +  ret void
> > +}
> > +
> > +define void @test_uitofp() {
> > +; CHECK-LABEL: test_uitofp:
> > +
> > +  %src32 = load i32* @var32
> > +  %val32 = uitofp i32 %src32 to fp128
> > +  store volatile fp128 %val32, fp128* @lhs
> > +; CHECK: bl __floatunsitf
> > +
> > +  %src64 = load i64* @var64
> > +  %val64 = uitofp i64 %src64 to fp128
> > +  store volatile fp128 %val64, fp128* @lhs
> > +; CHECK: bl __floatunditf
> > +
> > +  ret void
> > +}
> > +
> > +define i1 @test_setcc1() {
> > +; CHECK-LABEL: test_setcc1:
> > +
> > +  %lhs = load fp128* @lhs, align 16
> > +  %rhs = load fp128* @rhs, align 16
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
> > +; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
> > +
> > +; Technically, everything after the call to __letf2 is redundant, but
> we'll let
> > +; LLVM have its fun for now.
> > +  %val = fcmp ole fp128 %lhs, %rhs
> > +; CHECK: bl __letf2
> > +; CHECK: cmp w0, #0
> > +; CHECK: csinc w0, wzr, wzr, gt
> > +
> > +  ret i1 %val
> > +; CHECK: ret
> > +}
> > +
> > +define i1 @test_setcc2() {
> > +; CHECK-LABEL: test_setcc2:
> > +
> > +  %lhs = load fp128* @lhs, align 16
> > +  %rhs = load fp128* @rhs, align 16
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
> > +; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
> > +
> > +  %val = fcmp ugt fp128 %lhs, %rhs
> > +; CHECK: bl      __gttf2
> > +; CHECK: cmp     w0, #0
> > +; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
> > +
> > +; CHECK: bl      __unordtf2
> > +; CHECK: cmp     w0, #0
> > +; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
> > +; CHECK: orr     w0, [[UNORDERED]], [[GT]]
> > +
> > +  ret i1 %val
> > +; CHECK: ret
> > +}
> > +
> > +define i32 @test_br_cc() {
> > +; CHECK-LABEL: test_br_cc:
> > +
> > +  %lhs = load fp128* @lhs, align 16
> > +  %rhs = load fp128* @rhs, align 16
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
> > +; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
> > +
> > +  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
> > +  %cond = fcmp olt fp128 %lhs, %rhs
> > +; CHECK: bl      __getf2
> > +; CHECK: cmp     w0, #0
> > +; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
> > +
> > +; CHECK: bl      __unordtf2
> > +; CHECK: cmp     w0, #0
> > +; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
> > +
> > +; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
> > +; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
> > +  br i1 %cond, label %iftrue, label %iffalse
> > +
> > +iftrue:
> > +  ret i32 42
> > +; CHECK-NEXT: BB#
> > +; CHECK-NEXT: movz w0, #42
> > +; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
> > +
> > +iffalse:
> > +  ret i32 29
> > +; CHECK: [[RET29]]:
> > +; CHECK-NEXT: movz w0, #29
> > +; CHECK-NEXT: [[REALRET]]:
> > +; CHECK: ret
> > +}
> > +
> > +define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
> > +; CHECK-LABEL: test_select:
> > +
> > +  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
> > +  store fp128 %val, fp128* @lhs, align 16
> > +; CHECK: and [[BIT:w[0-9]+]], w0, #0x1
> > +; CHECK: cmp [[BIT]], #0
> > +; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
> > +; CHECK-NEXT: BB#
> > +; CHECK-NEXT: orr v[[VAL:[0-9]+]].16b, v0.16b, v0.16b
> > +; CHECK-NEXT: [[IFFALSE]]:
> > +; CHECK: str q[[VAL]], [{{x[0-9]+}}, :lo12:lhs]
> > +  ret void
> > +; CHECK: ret
> > +}
> > +
> > + at varfloat = global float 0.0, align 4
> > + at vardouble = global double 0.0, align 8
> > +
> > +define void @test_round() {
> > +; CHECK-LABEL: test_round:
> > +
> > +  %val = load fp128* @lhs, align 16
> > +
> > +  %float = fptrunc fp128 %val to float
> > +  store float %float, float* @varfloat, align 4
> > +; CHECK: bl __trunctfsf2
> > +; CHECK: str s0, [{{x[0-9]+}}, :lo12:varfloat]
> > +
> > +  %double = fptrunc fp128 %val to double
> > +  store double %double, double* @vardouble, align 8
> > +; CHECK: bl __trunctfdf2
> > +; CHECK: str d0, [{{x[0-9]+}}, :lo12:vardouble]
> > +
> > +  ret void
> > +}
> > +
> > +define void @test_extend() {
> > +; CHECK-LABEL: test_extend:
> > +
> > +  %val = load fp128* @lhs, align 16
> > +
> > +  %float = load float* @varfloat
> > +  %fromfloat = fpext float %float to fp128
> > +  store volatile fp128 %fromfloat, fp128* @lhs, align 16
> > +; CHECK: bl __extendsftf2
> > +; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
> > +
> > +  %double = load double* @vardouble
> > +  %fromdouble = fpext double %double to fp128
> > +  store volatile fp128 %fromdouble, fp128* @lhs, align 16
> > +; CHECK: bl __extenddftf2
> > +; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
> > +
> > +  ret void
> > +; CHECK: ret
> > +}
> > +
> > +define fp128 @test_neg(fp128 %in) {
> > +; CHECK: [[MINUS0:.LCPI[0-9]+_0]]:
> > +; Make sure the weird hex constant below *is* -0.0
> > +; CHECK-NEXT: fp128 -0
> > +
> > +; CHECK-LABEL: test_neg:
> > +
> > +  ; Could in principle be optimized to fneg which we can't select, this
> makes
> > +  ; sure that doesn't happen.
> > +  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
> > +; CHECK: orr v1.16b, v0.16b, v0.16b
> > +; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:[[MINUS0]]]
> > +; CHECK: bl __subtf3
> > +
> > +  ret fp128 %ret
> > +; CHECK: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/frame-index.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/frame-index.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/frame-index.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/frame-index.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
> > +; rdar://11935841
> > +
> > +define void @t1() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK-NOT: add x{{[0-9]+}}, sp
> > +; CHECK: stp x28, x27, [sp, #-16]!
> > +  %v = alloca [288 x i32], align 4
> > +  unreachable
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/frameaddr.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/frameaddr.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/frameaddr.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/frameaddr.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,15 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define i8* @t() nounwind {
> > +entry:
> > +; CHECK-LABEL: t:
> > +; CHECK: stp fp, lr, [sp, #-16]!
> > +; CHECK: mov fp, sp
> > +; CHECK: mov x0, fp
> > +; CHECK: ldp fp, lr, [sp], #16
> > +; CHECK: ret
> > +       %0 = call i8* @llvm.frameaddress(i32 0)
> > +        ret i8* %0
> > +}
> > +
> > +declare i8* @llvm.frameaddress(i32) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/global-address.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/global-address.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/global-address.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/global-address.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,14 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
> > +; rdar://9618644
> > +
> > + at G = external global i32
> > +
> > +define i32 @test(i32 %off) nounwind {
> > +; CHECK-LABEL: test:
> > +; CHECK: adrp x[[REG:[0-9]+]], _G at GOTPAGE
> > +; CHECK: ldr  x[[REG2:[0-9]+]], [x[[REG]], _G at GOTPAGEOFF]
> > +; CHECK: add w0, w[[REG2]], w0
> > +  %tmp = ptrtoint i32* @G to i32
> > +  %tmp1 = add i32 %tmp, %off
> > +  ret i32 %tmp1
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/hello.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/hello.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/hello.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/hello.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,38 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
> > +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
> --check-prefix=CHECK-LINUX
> > +
> > +; CHECK-LABEL: main:
> > +; CHECK:       stp     fp, lr, [sp, #-16]!
> > +; CHECK-NEXT:  mov     fp, sp
> > +; CHECK-NEXT:  sub     sp, sp, #16
> > +; CHECK-NEXT:  stur    wzr, [fp, #-4]
> > +; CHECK:       adrp    x0, L_.str at PAGE
> > +; CHECK:       add     x0, x0, L_.str at PAGEOFF
> > +; CHECK-NEXT:  bl      _puts
> > +; CHECK-NEXT:  mov     sp, fp
> > +; CHECK-NEXT:  ldp     fp, lr, [sp], #16
> > +; CHECK-NEXT:  ret
> > +
> > +; CHECK-LINUX-LABEL: main:
> > +; CHECK-LINUX: stp     fp, lr, [sp, #-16]!
> > +; CHECK-LINUX-NEXT:    mov     fp, sp
> > +; CHECK-LINUX-NEXT:    sub     sp, sp, #16
> > +; CHECK-LINUX-NEXT:    stur    wzr, [fp, #-4]
> > +; CHECK-LINUX: adrp    x0, .L.str
> > +; CHECK-LINUX: add     x0, x0, :lo12:.L.str
> > +; CHECK-LINUX-NEXT:    bl      puts
> > +; CHECK-LINUX-NEXT:    mov     sp, fp
> > +; CHECK-LINUX-NEXT:    ldp     fp, lr, [sp], #16
> > +; CHECK-LINUX-NEXT:    ret
> > +
> > + at .str = private unnamed_addr constant [7 x i8] c"hello\0A\00"
> > +
> > +define i32 @main() nounwind ssp {
> > +entry:
> > +  %retval = alloca i32, align 4
> > +  store i32 0, i32* %retval
> > +  %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str,
> i32 0, i32 0))
> > +  ret i32 %call
> > +}
> > +
> > +declare i32 @puts(i8*)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/i16-subreg-extract.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/i16-subreg-extract.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/i16-subreg-extract.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/i16-subreg-extract.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,12 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define i32 @foo(<4 x i16>* %__a) nounwind {
> > +; CHECK-LABEL: foo:
> > +; CHECK: umov.h w{{[0-9]+}}, v{{[0-9]+}}[0]
> > +  %tmp18 = load <4 x i16>* %__a, align 8
> > +  %vget_lane = extractelement <4 x i16> %tmp18, i32 0
> > +  %conv = zext i16 %vget_lane to i32
> > +  %mul = mul nsw i32 3, %conv
> > +  ret i32 %mul
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/icmp-opt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/icmp-opt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/icmp-opt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/icmp-opt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,17 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +; Optimize (x > -1) to (x >= 0) etc.
> > +; Optimize (cmp (add / sub), 0): eliminate the subs used to update flag
> > +;   for comparison only
> > +; rdar://10233472
> > +
> > +define i32 @t1(i64 %a) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK-NOT: movn
> > +; CHECK: cmp  x0, #0
> > +; CHECK: csinc w0, wzr, wzr, lt
> > +  %cmp = icmp sgt i64 %a, -1
> > +  %conv = zext i1 %cmp to i32
> > +  ret i32 %conv
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/illegal-float-ops.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/illegal-float-ops.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/illegal-float-ops.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/illegal-float-ops.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,247 @@
> > +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s |
> FileCheck %s
> > +
> > + at varfloat = global float 0.0
> > + at vardouble = global double 0.0
> > + at varfp128 = global fp128 zeroinitializer
> > +
> > +declare float @llvm.cos.f32(float)
> > +declare double @llvm.cos.f64(double)
> > +declare fp128 @llvm.cos.f128(fp128)
> > +
> > +define void @test_cos(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_cos:
> > +
> > +   %cosfloat = call float @llvm.cos.f32(float %float)
> > +   store float %cosfloat, float* @varfloat
> > +; CHECK: bl cosf
> > +
> > +   %cosdouble = call double @llvm.cos.f64(double %double)
> > +   store double %cosdouble, double* @vardouble
> > +; CHECK: bl cos
> > +
> > +   %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128)
> > +   store fp128 %cosfp128, fp128* @varfp128
> > +; CHECK: bl cosl
> > +
> > +  ret void
> > +}
> > +
> > +declare float @llvm.exp.f32(float)
> > +declare double @llvm.exp.f64(double)
> > +declare fp128 @llvm.exp.f128(fp128)
> > +
> > +define void @test_exp(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_exp:
> > +
> > +   %expfloat = call float @llvm.exp.f32(float %float)
> > +   store float %expfloat, float* @varfloat
> > +; CHECK: bl expf
> > +
> > +   %expdouble = call double @llvm.exp.f64(double %double)
> > +   store double %expdouble, double* @vardouble
> > +; CHECK: bl exp
> > +
> > +   %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128)
> > +   store fp128 %expfp128, fp128* @varfp128
> > +; CHECK: bl expl
> > +
> > +  ret void
> > +}
> > +
> > +declare float @llvm.exp2.f32(float)
> > +declare double @llvm.exp2.f64(double)
> > +declare fp128 @llvm.exp2.f128(fp128)
> > +
> > +define void @test_exp2(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_exp2:
> > +
> > +   %exp2float = call float @llvm.exp2.f32(float %float)
> > +   store float %exp2float, float* @varfloat
> > +; CHECK: bl exp2f
> > +
> > +   %exp2double = call double @llvm.exp2.f64(double %double)
> > +   store double %exp2double, double* @vardouble
> > +; CHECK: bl exp2
> > +
> > +   %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128)
> > +   store fp128 %exp2fp128, fp128* @varfp128
> > +; CHECK: bl exp2l
> > +  ret void
> > +
> > +}
> > +
> > +declare float @llvm.log.f32(float)
> > +declare double @llvm.log.f64(double)
> > +declare fp128 @llvm.log.f128(fp128)
> > +
> > +define void @test_log(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_log:
> > +
> > +   %logfloat = call float @llvm.log.f32(float %float)
> > +   store float %logfloat, float* @varfloat
> > +; CHECK: bl logf
> > +
> > +   %logdouble = call double @llvm.log.f64(double %double)
> > +   store double %logdouble, double* @vardouble
> > +; CHECK: bl log
> > +
> > +   %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128)
> > +   store fp128 %logfp128, fp128* @varfp128
> > +; CHECK: bl logl
> > +
> > +  ret void
> > +}
> > +
> > +declare float @llvm.log2.f32(float)
> > +declare double @llvm.log2.f64(double)
> > +declare fp128 @llvm.log2.f128(fp128)
> > +
> > +define void @test_log2(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_log2:
> > +
> > +   %log2float = call float @llvm.log2.f32(float %float)
> > +   store float %log2float, float* @varfloat
> > +; CHECK: bl log2f
> > +
> > +   %log2double = call double @llvm.log2.f64(double %double)
> > +   store double %log2double, double* @vardouble
> > +; CHECK: bl log2
> > +
> > +   %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128)
> > +   store fp128 %log2fp128, fp128* @varfp128
> > +; CHECK: bl log2l
> > +  ret void
> > +
> > +}
> > +
> > +declare float @llvm.log10.f32(float)
> > +declare double @llvm.log10.f64(double)
> > +declare fp128 @llvm.log10.f128(fp128)
> > +
> > +define void @test_log10(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_log10:
> > +
> > +   %log10float = call float @llvm.log10.f32(float %float)
> > +   store float %log10float, float* @varfloat
> > +; CHECK: bl log10f
> > +
> > +   %log10double = call double @llvm.log10.f64(double %double)
> > +   store double %log10double, double* @vardouble
> > +; CHECK: bl log10
> > +
> > +   %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128)
> > +   store fp128 %log10fp128, fp128* @varfp128
> > +; CHECK: bl log10l
> > +
> > +  ret void
> > +}
> > +
> > +declare float @llvm.sin.f32(float)
> > +declare double @llvm.sin.f64(double)
> > +declare fp128 @llvm.sin.f128(fp128)
> > +
> > +define void @test_sin(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_sin:
> > +
> > +   %sinfloat = call float @llvm.sin.f32(float %float)
> > +   store float %sinfloat, float* @varfloat
> > +; CHECK: bl sinf
> > +
> > +   %sindouble = call double @llvm.sin.f64(double %double)
> > +   store double %sindouble, double* @vardouble
> > +; CHECK: bl sin
> > +
> > +   %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128)
> > +   store fp128 %sinfp128, fp128* @varfp128
> > +; CHECK: bl sinl
> > +  ret void
> > +
> > +}
> > +
> > +declare float @llvm.pow.f32(float, float)
> > +declare double @llvm.pow.f64(double, double)
> > +declare fp128 @llvm.pow.f128(fp128, fp128)
> > +
> > +define void @test_pow(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_pow:
> > +
> > +   %powfloat = call float @llvm.pow.f32(float %float, float %float)
> > +   store float %powfloat, float* @varfloat
> > +; CHECK: bl powf
> > +
> > +   %powdouble = call double @llvm.pow.f64(double %double, double
> %double)
> > +   store double %powdouble, double* @vardouble
> > +; CHECK: bl pow
> > +
> > +   %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128)
> > +   store fp128 %powfp128, fp128* @varfp128
> > +; CHECK: bl powl
> > +
> > +  ret void
> > +}
> > +
> > +declare float @llvm.powi.f32(float, i32)
> > +declare double @llvm.powi.f64(double, i32)
> > +declare fp128 @llvm.powi.f128(fp128, i32)
> > +
> > +define void @test_powi(float %float, double %double, i32 %exponent,
> fp128 %fp128) {
> > +; CHECK-LABEL: test_powi:
> > +
> > +   %powifloat = call float @llvm.powi.f32(float %float, i32 %exponent)
> > +   store float %powifloat, float* @varfloat
> > +; CHECK: bl __powisf2
> > +
> > +   %powidouble = call double @llvm.powi.f64(double %double, i32
> %exponent)
> > +   store double %powidouble, double* @vardouble
> > +; CHECK: bl __powidf2
> > +
> > +   %powifp128 = call fp128 @llvm.powi.f128(fp128 %fp128, i32 %exponent)
> > +   store fp128 %powifp128, fp128* @varfp128
> > +; CHECK: bl __powitf2
> > +  ret void
> > +
> > +}
> > +
> > +define void @test_frem(float %float, double %double, fp128 %fp128) {
> > +; CHECK-LABEL: test_frem:
> > +
> > +  %fremfloat = frem float %float, %float
> > +  store float %fremfloat, float* @varfloat
> > +; CHECK: bl fmodf
> > +
> > +  %fremdouble = frem double %double, %double
> > +  store double %fremdouble, double* @vardouble
> > +; CHECK: bl fmod
> > +
> > +  %fremfp128 = frem fp128 %fp128, %fp128
> > +  store fp128 %fremfp128, fp128* @varfp128
> > +; CHECK: bl fmodl
> > +
> > +  ret void
> > +}
> > +
> > +declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
> > +
> > +define void @test_fma(fp128 %fp128) {
> > +; CHECK-LABEL: test_fma:
> > +
> > +  %fmafp128 = call fp128 @llvm.fma.f128(fp128 %fp128, fp128 %fp128,
> fp128 %fp128)
> > +  store fp128 %fmafp128, fp128* @varfp128
> > +; CHECK: bl fmal
> > +
> > +  ret void
> > +}
> > +
> > +declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
> > +
> > +define void @test_fmuladd(fp128 %fp128) {
> > +; CHECK-LABEL: test_fmuladd:
> > +
> > +  %fmuladdfp128 = call fp128 @llvm.fmuladd.f128(fp128 %fp128, fp128
> %fp128, fp128 %fp128)
> > +  store fp128 %fmuladdfp128, fp128* @varfp128
> > +; CHECK-NOT: bl fmal
> > +; CHECK: bl __multf3
> > +; CHECK: bl __addtf3
> > +
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/indexed-memory.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/indexed-memory.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/indexed-memory.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/indexed-memory.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,351 @@
> > +; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s
> > +
> > +define void @store64(i64** nocapture %out, i64 %index, i64 %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: store64:
> > +; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8
> > +; CHECK: ret
> > +  %tmp = load i64** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds i64* %tmp, i64 1
> > +  store i64 %spacing, i64* %tmp, align 4
> > +  store i64* %incdec.ptr, i64** %out, align 8
> > +  ret void
> > +}
> > +
> > +define void @store32(i32** nocapture %out, i32 %index, i32 %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: store32:
> > +; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
> > +; CHECK: ret
> > +  %tmp = load i32** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
> > +  store i32 %spacing, i32* %tmp, align 4
> > +  store i32* %incdec.ptr, i32** %out, align 8
> > +  ret void
> > +}
> > +
> > +define void @store16(i16** nocapture %out, i16 %index, i16 %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: store16:
> > +; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
> > +; CHECK: ret
> > +  %tmp = load i16** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
> > +  store i16 %spacing, i16* %tmp, align 4
> > +  store i16* %incdec.ptr, i16** %out, align 8
> > +  ret void
> > +}
> > +
> > +define void @store8(i8** nocapture %out, i8 %index, i8 %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: store8:
> > +; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
> > +; CHECK: ret
> > +  %tmp = load i8** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
> > +  store i8 %spacing, i8* %tmp, align 4
> > +  store i8* %incdec.ptr, i8** %out, align 8
> > +  ret void
> > +}
> > +
> > +define void @truncst64to32(i32** nocapture %out, i32 %index, i64
> %spacing) nounwind noinline ssp {
> > +; CHECK-LABEL: truncst64to32:
> > +; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
> > +; CHECK: ret
> > +  %tmp = load i32** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
> > +  %trunc = trunc i64 %spacing to i32
> > +  store i32 %trunc, i32* %tmp, align 4
> > +  store i32* %incdec.ptr, i32** %out, align 8
> > +  ret void
> > +}
> > +
> > +define void @truncst64to16(i16** nocapture %out, i16 %index, i64
> %spacing) nounwind noinline ssp {
> > +; CHECK-LABEL: truncst64to16:
> > +; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
> > +; CHECK: ret
> > +  %tmp = load i16** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
> > +  %trunc = trunc i64 %spacing to i16
> > +  store i16 %trunc, i16* %tmp, align 4
> > +  store i16* %incdec.ptr, i16** %out, align 8
> > +  ret void
> > +}
> > +
> > +define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: truncst64to8:
> > +; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
> > +; CHECK: ret
> > +  %tmp = load i8** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
> > +  %trunc = trunc i64 %spacing to i8
> > +  store i8 %trunc, i8* %tmp, align 4
> > +  store i8* %incdec.ptr, i8** %out, align 8
> > +  ret void
> > +}
> > +
> > +
> > +define void @storef32(float** nocapture %out, float %index, float
> %spacing) nounwind noinline ssp {
> > +; CHECK-LABEL: storef32:
> > +; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
> > +; CHECK: ret
> > +  %tmp = load float** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds float* %tmp, i64 1
> > +  store float %spacing, float* %tmp, align 4
> > +  store float* %incdec.ptr, float** %out, align 8
> > +  ret void
> > +}
> > +
> > +define void @storef64(double** nocapture %out, double %index, double
> %spacing) nounwind noinline ssp {
> > +; CHECK-LABEL: storef64:
> > +; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8
> > +; CHECK: ret
> > +  %tmp = load double** %out, align 8
> > +  %incdec.ptr = getelementptr inbounds double* %tmp, i64 1
> > +  store double %spacing, double* %tmp, align 4
> > +  store double* %incdec.ptr, double** %out, align 8
> > +  ret void
> > +}
> > +
> > +define double * @pref64(double** nocapture %out, double %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: pref64:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: str     d0, [x0, #32]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load double** %out, align 8
> > +  %ptr = getelementptr inbounds double* %tmp, i64 4
> > +  store double %spacing, double* %ptr, align 4
> > +  ret double *%ptr
> > +}
> > +
> > +define float * @pref32(float** nocapture %out, float %spacing) nounwind
> noinline ssp {
> > +; CHECK-LABEL: pref32:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: str     s0, [x0, #12]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load float** %out, align 8
> > +  %ptr = getelementptr inbounds float* %tmp, i64 3
> > +  store float %spacing, float* %ptr, align 4
> > +  ret float *%ptr
> > +}
> > +
> > +define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind
> noinline ssp {
> > +; CHECK-LABEL: pre64:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: str     x1, [x0, #16]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load i64** %out, align 8
> > +  %ptr = getelementptr inbounds i64* %tmp, i64 2
> > +  store i64 %spacing, i64* %ptr, align 4
> > +  ret i64 *%ptr
> > +}
> > +
> > +define i32 * @pre32(i32** nocapture %out, i32 %spacing) nounwind
> noinline ssp {
> > +; CHECK-LABEL: pre32:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: str     w1, [x0, #8]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load i32** %out, align 8
> > +  %ptr = getelementptr inbounds i32* %tmp, i64 2
> > +  store i32 %spacing, i32* %ptr, align 4
> > +  ret i32 *%ptr
> > +}
> > +
> > +define i16 * @pre16(i16** nocapture %out, i16 %spacing) nounwind
> noinline ssp {
> > +; CHECK-LABEL: pre16:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: strh    w1, [x0, #4]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load i16** %out, align 8
> > +  %ptr = getelementptr inbounds i16* %tmp, i64 2
> > +  store i16 %spacing, i16* %ptr, align 4
> > +  ret i16 *%ptr
> > +}
> > +
> > +define i8 * @pre8(i8** nocapture %out, i8 %spacing) nounwind noinline
> ssp {
> > +; CHECK-LABEL: pre8:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: strb    w1, [x0, #2]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load i8** %out, align 8
> > +  %ptr = getelementptr inbounds i8* %tmp, i64 2
> > +  store i8 %spacing, i8* %ptr, align 4
> > +  ret i8 *%ptr
> > +}
> > +
> > +define i32 * @pretrunc64to32(i32** nocapture %out, i64 %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: pretrunc64to32:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: str     w1, [x0, #8]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load i32** %out, align 8
> > +  %ptr = getelementptr inbounds i32* %tmp, i64 2
> > +  %trunc = trunc i64 %spacing to i32
> > +  store i32 %trunc, i32* %ptr, align 4
> > +  ret i32 *%ptr
> > +}
> > +
> > +define i16 * @pretrunc64to16(i16** nocapture %out, i64 %spacing)
> nounwind noinline ssp {
> > +; CHECK-LABEL: pretrunc64to16:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: strh    w1, [x0, #4]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load i16** %out, align 8
> > +  %ptr = getelementptr inbounds i16* %tmp, i64 2
> > +  %trunc = trunc i64 %spacing to i16
> > +  store i16 %trunc, i16* %ptr, align 4
> > +  ret i16 *%ptr
> > +}
> > +
> > +define i8 * @pretrunc64to8(i8** nocapture %out, i64 %spacing) nounwind
> noinline ssp {
> > +; CHECK-LABEL: pretrunc64to8:
> > +; CHECK: ldr     x0, [x0]
> > +; CHECK-NEXT: strb    w1, [x0, #2]!
> > +; CHECK-NEXT: ret
> > +  %tmp = load i8** %out, align 8
> > +  %ptr = getelementptr inbounds i8* %tmp, i64 2
> > +  %trunc = trunc i64 %spacing to i8
> > +  store i8 %trunc, i8* %ptr, align 4
> > +  ret i8 *%ptr
> > +}
> > +
> > +;-----
> > +; Pre-indexed loads
> > +;-----
> > +define double* @preidxf64(double* %src, double* %out) {
> > +; CHECK-LABEL: preidxf64:
> > +; CHECK: ldr     d0, [x0, #8]!
> > +; CHECK: str     d0, [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds double* %src, i64 1
> > +  %tmp = load double* %ptr, align 4
> > +  store double %tmp, double* %out, align 4
> > +  ret double* %ptr
> > +}
> > +
> > +define float* @preidxf32(float* %src, float* %out) {
> > +; CHECK-LABEL: preidxf32:
> > +; CHECK: ldr     s0, [x0, #4]!
> > +; CHECK: str     s0, [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds float* %src, i64 1
> > +  %tmp = load float* %ptr, align 4
> > +  store float %tmp, float* %out, align 4
> > +  ret float* %ptr
> > +}
> > +
> > +define i64* @preidx64(i64* %src, i64* %out) {
> > +; CHECK-LABEL: preidx64:
> > +; CHECK: ldr     x[[REG:[0-9]+]], [x0, #8]!
> > +; CHECK: str     x[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i64* %src, i64 1
> > +  %tmp = load i64* %ptr, align 4
> > +  store i64 %tmp, i64* %out, align 4
> > +  ret i64* %ptr
> > +}
> > +
> > +define i32* @preidx32(i32* %src, i32* %out) {
> > +; CHECK: ldr     w[[REG:[0-9]+]], [x0, #4]!
> > +; CHECK: str     w[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i32* %src, i64 1
> > +  %tmp = load i32* %ptr, align 4
> > +  store i32 %tmp, i32* %out, align 4
> > +  ret i32* %ptr
> > +}
> > +
> > +define i16* @preidx16zext32(i16* %src, i32* %out) {
> > +; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
> > +; CHECK: str     w[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i16* %src, i64 1
> > +  %tmp = load i16* %ptr, align 4
> > +  %ext = zext i16 %tmp to i32
> > +  store i32 %ext, i32* %out, align 4
> > +  ret i16* %ptr
> > +}
> > +
> > +define i16* @preidx16zext64(i16* %src, i64* %out) {
> > +; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
> > +; CHECK: str     x[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i16* %src, i64 1
> > +  %tmp = load i16* %ptr, align 4
> > +  %ext = zext i16 %tmp to i64
> > +  store i64 %ext, i64* %out, align 4
> > +  ret i16* %ptr
> > +}
> > +
> > +define i8* @preidx8zext32(i8* %src, i32* %out) {
> > +; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
> > +; CHECK: str     w[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i8* %src, i64 1
> > +  %tmp = load i8* %ptr, align 4
> > +  %ext = zext i8 %tmp to i32
> > +  store i32 %ext, i32* %out, align 4
> > +  ret i8* %ptr
> > +}
> > +
> > +define i8* @preidx8zext64(i8* %src, i64* %out) {
> > +; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
> > +; CHECK: str     x[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i8* %src, i64 1
> > +  %tmp = load i8* %ptr, align 4
> > +  %ext = zext i8 %tmp to i64
> > +  store i64 %ext, i64* %out, align 4
> > +  ret i8* %ptr
> > +}
> > +
> > +define i32* @preidx32sext64(i32* %src, i64* %out) {
> > +; CHECK: ldrsw   x[[REG:[0-9]+]], [x0, #4]!
> > +; CHECK: str     x[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i32* %src, i64 1
> > +  %tmp = load i32* %ptr, align 4
> > +  %ext = sext i32 %tmp to i64
> > +  store i64 %ext, i64* %out, align 8
> > +  ret i32* %ptr
> > +}
> > +
> > +define i16* @preidx16sext32(i16* %src, i32* %out) {
> > +; CHECK: ldrsh   w[[REG:[0-9]+]], [x0, #2]!
> > +; CHECK: str     w[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i16* %src, i64 1
> > +  %tmp = load i16* %ptr, align 4
> > +  %ext = sext i16 %tmp to i32
> > +  store i32 %ext, i32* %out, align 4
> > +  ret i16* %ptr
> > +}
> > +
> > +define i16* @preidx16sext64(i16* %src, i64* %out) {
> > +; CHECK: ldrsh   x[[REG:[0-9]+]], [x0, #2]!
> > +; CHECK: str     x[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i16* %src, i64 1
> > +  %tmp = load i16* %ptr, align 4
> > +  %ext = sext i16 %tmp to i64
> > +  store i64 %ext, i64* %out, align 4
> > +  ret i16* %ptr
> > +}
> > +
> > +define i8* @preidx8sext32(i8* %src, i32* %out) {
> > +; CHECK: ldrsb   w[[REG:[0-9]+]], [x0, #1]!
> > +; CHECK: str     w[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i8* %src, i64 1
> > +  %tmp = load i8* %ptr, align 4
> > +  %ext = sext i8 %tmp to i32
> > +  store i32 %ext, i32* %out, align 4
> > +  ret i8* %ptr
> > +}
> > +
> > +define i8* @preidx8sext64(i8* %src, i64* %out) {
> > +; CHECK: ldrsb   x[[REG:[0-9]+]], [x0, #1]!
> > +; CHECK: str     x[[REG]], [x1]
> > +; CHECK: ret
> > +  %ptr = getelementptr inbounds i8* %src, i64 1
> > +  %tmp = load i8* %ptr, align 4
> > +  %ext = sext i8 %tmp to i64
> > +  store i64 %ext, i64* %out, align 4
> > +  ret i8* %ptr
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm-error-I.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm-error-I.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm-error-I.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm-error-I.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: not llc -march=arm64 < %s  2> %t
> > +; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
> > +
> > +; Check for at least one invalid constant.
> > +; CHECK-ERRORS:        error: invalid operand for inline asm constraint
> 'I'
> > +
> > +define i32 @constraint_I(i32 %i, i32 %j) nounwind ssp {
> > +entry:
> > +  %0 = tail call i32 asm sideeffect "add $0, $1, $2", "=r,r,I"(i32 %i,
> i32 4097) nounwind
> > +  ret i32 %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm-error-J.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm-error-J.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm-error-J.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm-error-J.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: not llc -march=arm64 < %s  2> %t
> > +; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
> > +
> > +; Check for at least one invalid constant.
> > +; CHECK-ERRORS:        error: invalid operand for inline asm constraint
> 'J'
> > +
> > +define i32 @constraint_J(i32 %i, i32 %j) nounwind ssp {
> > +entry:
> > +  %0 = tail call i32 asm sideeffect "sub $0, $1, $2", "=r,r,J"(i32 %i,
> i32 2) nounwind
> > +  ret i32 %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm-error-K.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm-error-K.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm-error-K.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm-error-K.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: not llc -march=arm64 < %s  2> %t
> > +; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
> > +
> > +; Check for at least one invalid constant.
> > +; CHECK-ERRORS:        error: invalid operand for inline asm constraint
> 'K'
> > +
> > +define i32 @constraint_K(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  %0 = tail call i32 asm sideeffect "eor $0, $1, $2", "=r,r,K"(i32 %i,
> i32 -1) nounwind
> > +  ret i32 %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm-error-L.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm-error-L.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm-error-L.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm-error-L.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: not llc -march=arm64 < %s  2> %t
> > +; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
> > +
> > +; Check for at least one invalid constant.
> > +; CHECK-ERRORS:        error: invalid operand for inline asm constraint
> 'L'
> > +
> > +define i32 @constraint_L(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  %0 = tail call i32 asm sideeffect "eor $0, $1, $2", "=r,r,L"(i32 %i,
> i64 -1) nounwind
> > +  ret i32 %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm-error-M.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm-error-M.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm-error-M.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm-error-M.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: not llc -march=arm64 < %s  2> %t
> > +; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
> > +
> > +; Check for at least one invalid constant.
> > +; CHECK-ERRORS:        error: invalid operand for inline asm constraint
> 'M'
> > +
> > +define i32 @constraint_M(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  %0 = tail call i32 asm sideeffect "movk $0, $1", "=r,M"(i32
> 305418240) nounwind
> > +  ret i32 %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm-error-N.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm-error-N.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm-error-N.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm-error-N.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: not llc -march=arm64 < %s  2> %t
> > +; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
> > +
> > +; Check for at least one invalid constant.
> > +; CHECK-ERRORS:        error: invalid operand for inline asm constraint
> 'N'
> > +
> > +define i32 @constraint_N(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  %0 = tail call i32 asm sideeffect "movk $0, $1", "=r,N"(i64
> 1311761352401879040) nounwind
> > +  ret i32 %0
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: not llc < %s -march=arm64 2>&1 | FileCheck %s
> > +
> > +
> > +; The 'z' constraint allocates either xzr or wzr, but obviously an
> input of 1 is
> > +; incompatible.
> > +define void @test_bad_zero_reg() {
> > +  tail call void asm sideeffect "USE($0)", "z"(i32 1) nounwind
> > +; CHECK: error: invalid operand for inline asm constraint 'z'
> > +
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/inline-asm.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/inline-asm.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/inline-asm.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/inline-asm.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,230 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -no-integrated-as
> | FileCheck %s
> > +
> > +; rdar://9167275
> > +
> > +define i32 @t1() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: mov {{w[0-9]+}}, 7
> > +  %0 = tail call i32 asm "mov ${0:w}, 7", "=r"() nounwind
> > +  ret i32 %0
> > +}
> > +
> > +define i64 @t2() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: mov {{x[0-9]+}}, 7
> > +  %0 = tail call i64 asm "mov $0, 7", "=r"() nounwind
> > +  ret i64 %0
> > +}
> > +
> > +define i64 @t3() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t3:
> > +; CHECK: mov {{w[0-9]+}}, 7
> > +  %0 = tail call i64 asm "mov ${0:w}, 7", "=r"() nounwind
> > +  ret i64 %0
> > +}
> > +
> > +; rdar://9281206
> > +
> > +define void @t4(i64 %op) nounwind {
> > +entry:
> > +; CHECK-LABEL: t4:
> > +; CHECK: mov x0, {{x[0-9]+}}; svc #0
> > +  %0 = tail call i64 asm sideeffect "mov x0, $1; svc #0;",
> "=r,r,r,~{x0}"(i64 %op, i64 undef) nounwind
> > +  ret void
> > +}
> > +
> > +; rdar://9394290
> > +
> > +define float @t5(float %x) nounwind {
> > +entry:
> > +; CHECK-LABEL: t5:
> > +; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
> > +  %0 = tail call float asm "fadd ${0:s}, ${0:s}, ${0:s}", "=w,0"(float
> %x) nounwind
> > +  ret float %0
> > +}
> > +
> > +; rdar://9553599
> > +
> > +define zeroext i8 @t6(i8* %src) nounwind {
> > +entry:
> > +; CHECK-LABEL: t6:
> > +; CHECK: ldtrb {{w[0-9]+}}, [{{x[0-9]+}}]
> > +  %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind
> > +  ret i8 %0
> > +}
> > +
> > +define void @t7(i8* %f, i32 %g) nounwind {
> > +entry:
> > +  %f.addr = alloca i8*, align 8
> > +  store i8* %f, i8** %f.addr, align 8
> > +  ; CHECK-LABEL: t7:
> > +  ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}]
> > +  call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
> > +  ret void
> > +}
> > +
> > +; rdar://10258229
> > +; ARM64TargetLowering::getRegForInlineAsmConstraint() should recognize
> 'v'
> > +; registers.
> > +define void @t8() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t8:
> > +; CHECK: stp {{d[0-9]+}}, {{d[0-9]+}}, [sp, #-16]
> > +  tail call void asm sideeffect "nop", "~{v8}"() nounwind
> > +  ret void
> > +}
> > +
> > +define i32 @constraint_I(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  ; CHECK-LABEL: constraint_I:
> > +  %0 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2",
> "=r,r,I"(i32 %i, i32 16773120) nounwind
> > +  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, #16773120
> > +  %1 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2",
> "=r,r,I"(i32 %i, i32 4096) nounwind
> > +  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, #4096
> > +  ret i32 %1
> > +}
> > +
> > +define i32 @constraint_J(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  ; CHECK-LABEL: constraint_J:
> > +  %0 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2",
> "=r,r,J"(i32 %i, i32 -16773120) nounwind
> > +  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4278194176
> > +  %1 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2",
> "=r,r,J"(i32 %i, i32 -1) nounwind
> > +  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4294967295
> > +  ret i32 %1
> > +}
> > +
> > +define i32 @constraint_KL(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  ; CHECK-LABEL: constraint_KL:
> > +  %0 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2",
> "=r,r,K"(i32 %i, i32 255) nounwind
> > +  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #255
> > +  %1 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2",
> "=r,r,L"(i32 %i, i64 16711680) nounwind
> > +  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #16711680
> > +  ret i32 %1
> > +}
> > +
> > +define i32 @constraint_MN(i32 %i, i32 %j) nounwind {
> > +entry:
> > +  ; CHECK-LABEL: constraint_MN:
> > +  %0 = tail call i32 asm sideeffect "movk ${0:w}, $1", "=r,M"(i32
> 65535) nounwind
> > +  ; CHECK: movk  {{w[0-9]+}}, #65535
> > +  %1 = tail call i32 asm sideeffect "movz ${0:w}, $1", "=r,N"(i64 0)
> nounwind
> > +  ; CHECK: movz  {{w[0-9]+}}, #0
> > +  ret i32 %1
> > +}
> > +
> > +define void @t9() nounwind {
> > +entry:
> > +  ; CHECK-LABEL: t9:
> > +  %data = alloca <2 x double>, align 16
> > +  %0 = load <2 x double>* %data, align 16
> > +  call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double>
> %0) nounwind
> > +  ; CHECK: mov.2d v4, {{v[0-9]+}}
> > +  ret void
> > +}
> > +
> > +define void @t10() nounwind {
> > +entry:
> > +  ; CHECK-LABEL: t10:
> > +  %data = alloca <2 x float>, align 8
> > +  %a = alloca [2 x float], align 4
> > +  %arraydecay = getelementptr inbounds [2 x float]* %a, i32 0, i32 0
> > +  %0 = load <2 x float>* %data, align 8
> > +  call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float*
> %arraydecay, <2 x float> %0) nounwind
> > +  ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
> > +  call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float*
> %arraydecay, <2 x float> %0) nounwind
> > +  ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
> > +  call void asm sideeffect "ldr ${1:s}, [$0]\0A", "r,w"(float*
> %arraydecay, <2 x float> %0) nounwind
> > +  ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}]
> > +  call void asm sideeffect "ldr ${1:h}, [$0]\0A", "r,w"(float*
> %arraydecay, <2 x float> %0) nounwind
> > +  ; CHECK: ldr {{h[0-9]+}}, [{{x[0-9]+}}]
> > +  call void asm sideeffect "ldr ${1:b}, [$0]\0A", "r,w"(float*
> %arraydecay, <2 x float> %0) nounwind
> > +  ; CHECK: ldr {{b[0-9]+}}, [{{x[0-9]+}}]
> > +  ret void
> > +}
> > +
> > +define void @t11() nounwind {
> > +entry:
> > +  ; CHECK-LABEL: t11:
> > +  %a = alloca i32, align 4
> > +  %0 = load i32* %a, align 4
> > +  call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32
> 0) nounwind
> > +  ; CHECK: mov xzr, {{x[0-9]+}}
> > +  %1 = load i32* %a, align 4
> > +  call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32
> 0) nounwind
> > +  ; CHECK: mov wzr, {{w[0-9]+}}
> > +  ret void
> > +}
> > +
> > +define void @t12() nounwind {
> > +entry:
> > +  ; CHECK-LABEL: t12:
> > +  %data = alloca <4 x float>, align 16
> > +  %0 = load <4 x float>* %data, align 16
> > +  call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float>
> %0) nounwind
> > +  ; CHECK mov.2d v4, {{v([0-9])|(1[0-5])}}
> > +  ret void
> > +}
> > +
> > +define void @t13() nounwind {
> > +entry:
> > +  ; CHECK-LABEL: t13:
> > +  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64
> 1311673391471656960) nounwind
> > +  ; CHECK: mov x4, #1311673391471656960
> > +  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 -4662) nounwind
> > +  ; CHECK: mov x4, #-4662
> > +  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 4660) nounwind
> > +  ; CHECK: mov x4, #4660
> > +  call void asm sideeffect "mov x4, $0\0A", "N"(i64 -71777214294589696)
> nounwind
> > +  ; CHECK: mov x4, #-71777214294589696
> > +  ret void
> > +}
> > +
> > +define void @t14() nounwind {
> > +entry:
> > +  ; CHECK-LABEL: t14:
> > +  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 305397760)
> nounwind
> > +  ; CHECK: mov w4, #305397760
> > +  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 -4662) nounwind
> > +  ; CHECK: mov w4, #4294962634
> > +  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 4660) nounwind
> > +  ; CHECK: mov w4, #4660
> > +  call void asm sideeffect "mov w4, $0\0A", "M"(i32 -16711936) nounwind
> > +  ; CHECK: mov w4, #4278255360
> > +  ret void
> > +}
> > +
> > +define void @t15() nounwind {
> > +entry:
> > +  %0 = tail call double asm sideeffect "fmov $0, d8", "=r"() nounwind
> > +  ; CHECK: fmov {{x[0-9]+}}, d8
> > +  ret void
> > +}
> > +
> > +; rdar://problem/14285178
> > +
> > +define void @test_zero_reg(i32* %addr) {
> > +; CHECK-LABEL: test_zero_reg:
> > +
> > +  tail call void asm sideeffect "USE($0)", "z"(i32 0) nounwind
> > +; CHECK: USE(xzr)
> > +
> > +  tail call void asm sideeffect "USE(${0:w})", "zr"(i32 0)
> > +; CHECK: USE(wzr)
> > +
> > +  tail call void asm sideeffect "USE(${0:w})", "zr"(i32 1)
> > +; CHECK: orr [[VAL1:w[0-9]+]], wzr, #0x1
> > +; CHECK: USE([[VAL1]])
> > +
> > +  tail call void asm sideeffect "USE($0), USE($1)", "z,z"(i32 0, i32 0)
> nounwind
> > +; CHECK: USE(xzr), USE(xzr)
> > +
> > +  tail call void asm sideeffect "USE($0), USE(${1:w})", "z,z"(i32 0,
> i32 0) nounwind
> > +; CHECK: USE(xzr), USE(wzr)
> > +
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/join-reserved.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/join-reserved.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/join-reserved.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/join-reserved.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,17 @@
> > +; RUN: llc < %s -verify-machineinstrs | FileCheck %s
> > +target triple = "arm64-apple-macosx10"
> > +
> > +; Make sure that a store to [sp] addresses off sp directly.
> > +; A move isn't necessary.
> > +; <rdar://problem/11492712>
> > +; CHECK-LABEL: g:
> > +; CHECK: str xzr, [sp]
> > +; CHECK: bl
> > +; CHECK: ret
> > +define void @g() nounwind ssp {
> > +entry:
> > +  tail call void (i32, ...)* @f(i32 0, i32 0) nounwind
> > +  ret void
> > +}
> > +
> > +declare void @f(i32, ...)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/jumptable.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/jumptable.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/jumptable.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/jumptable.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,35 @@
> > +; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s
> --check-prefix=CHECK-LINUX
> > +; <rdar://11417675>
> > +
> > +define void @sum(i32* %to) {
> > +entry:
> > +  switch i32 undef, label %exit [
> > +    i32 1, label %bb1
> > +    i32 2, label %bb2
> > +    i32 3, label %bb3
> > +    i32 4, label %bb4
> > +  ]
> > +bb1:
> > +  store i32 undef, i32* %to
> > +  br label %exit
> > +bb2:
> > +  store i32 undef, i32* %to
> > +  br label %exit
> > +bb3:
> > +  store i32 undef, i32* %to
> > +  br label %exit
> > +bb4:
> > +  store i32 undef, i32* %to
> > +  br label %exit
> > +exit:
> > +  ret void
> > +}
> > +
> > +; CHECK-LABEL: sum:
> > +; CHECK: adrp    {{x[0-9]+}}, LJTI0_0 at PAGE
> > +; CHECK:  add    {{x[0-9]+}}, {{x[0-9]+}}, LJTI0_0 at PAGEOFF
> > +
> > +; CHECK-LINUX-LABEL: sum:
> > +; CHECK-LINUX: adrp    {{x[0-9]+}}, .LJTI0_0
> > +; CHECK-LINUX:  add    {{x[0-9]+}}, {{x[0-9]+}}, :lo12:.LJTI0_0
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/ld1.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ld1.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/ld1.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/ld1.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,1254 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -verify-machineinstrs | FileCheck %s
> > +
> > +%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
> > +%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
> > +%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x
> i8> }
> > +
> > +define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
> > +; CHECK: ld2_8b
> > +; Make sure we are loading into the results defined by the ABI (i.e.,
> v0, v1)
> > +; and from the argument of the function also defined by ABI (i.e., x0)
> > +; CHECK ld2.8b { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x8x2_t
> @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x8x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
> > +; CHECK: ld3_8b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3.8b { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x8x3_t
> @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x8x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
> > +; CHECK: ld4_8b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4.8b { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x8x4_t
> @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x8x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2.v8i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3.v8i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8*)
> nounwind readonly
> > +
> > +%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
> > +%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
> > +%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,
>  <16 x i8> }
> > +
> > +define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
> > +; CHECK: ld2_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2.16b { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int8x16x2_t
> @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
> > +  ret %struct.__neon_int8x16x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
> > +; CHECK: ld3_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3.16b { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int8x16x3_t
> @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A)
> > +  ret %struct.__neon_int8x16x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
> > +; CHECK: ld4_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4.16b { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int8x16x4_t
> @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A)
> > +  ret %struct.__neon_int8x16x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2.v16i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3.v16i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8*)
> nounwind readonly
> > +
> > +%struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
> > +%struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
> > +%struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,
>  <4 x i16> }
> > +
> > +define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
> > +; CHECK: ld2_4h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2.4h { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x4x2_t
> @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A)
> > +       ret %struct.__neon_int16x4x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
> > +; CHECK: ld3_4h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3.4h { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x4x3_t
> @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A)
> > +       ret %struct.__neon_int16x4x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
> > +; CHECK: ld4_4h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4.4h { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x4x4_t
> @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A)
> > +       ret %struct.__neon_int16x4x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int16x4x2_t
> @llvm.arm64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x4x3_t
> @llvm.arm64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x4x4_t
> @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
> > +
> > +%struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
> > +%struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
> > +%struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,
>  <8 x i16> }
> > +
> > +define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
> > +; CHECK: ld2_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2.8h { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A)
> > +  ret %struct.__neon_int16x8x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
> > +; CHECK: ld3_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3.8h { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A)
> > +  ret %struct.__neon_int16x8x3_t %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
> > +; CHECK: ld4_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4.8h { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A)
> > +  ret %struct.__neon_int16x8x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
> > +
> > +%struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
> > +%struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
> > +%struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,
>  <2 x i32> }
> > +
> > +define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
> > +; CHECK: ld2_2s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2.2s { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x2x2_t
> @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x2x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
> > +; CHECK: ld3_2s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3.2s { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x2x3_t
> @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x2x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
> > +; CHECK: ld4_2s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4.2s { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x2x4_t
> @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x2x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int32x2x2_t
> @llvm.arm64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x2x3_t
> @llvm.arm64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x2x4_t
> @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
> > +
> > +%struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
> > +%struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
> > +%struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,
>  <4 x i32> }
> > +
> > +define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
> > +; CHECK: ld2_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2.4s { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x4x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
> > +; CHECK: ld3_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3.4s { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x4x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
> > +; CHECK: ld4_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4.4s { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x4x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
> > +
> > +%struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
> > +%struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
> > +%struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,
>  <2 x i64> }
> > +
> > +define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
> > +; CHECK: ld2_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2.2d { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x2x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
> > +; CHECK: ld3_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3.2d { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x2x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
> > +; CHECK: ld4_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4.2d { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x2x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
> > +
> > +%struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
> > +%struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
> > +%struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>,
> <1 x i64> }
> > +
> > +
> > +define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
> > +; CHECK: ld2_1di64
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld1.1d { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x1x2_t
> @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x1x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
> > +; CHECK: ld3_1di64
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld1.1d { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x1x3_t
> @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x1x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
> > +; CHECK: ld4_1di64
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x1x4_t
> @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x1x4_t  %tmp2
> > +}
> > +
> > +
> > +declare %struct.__neon_int64x1x2_t
> @llvm.arm64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_int64x1x3_t
> @llvm.arm64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_int64x1x4_t
> @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
> > +
> > +%struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
> > +%struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x
> double> }
> > +%struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x
> double>, <1 x double> }
> > +
> > +
> > +define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
> > +; CHECK: ld2_1df64
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld1.1d { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_float64x1x2_t
> @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A)
> > +       ret %struct.__neon_float64x1x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
> > +; CHECK: ld3_1df64
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld1.1d { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_float64x1x3_t
> @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A)
> > +       ret %struct.__neon_float64x1x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
> > +; CHECK: ld4_1df64
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_float64x1x4_t
> @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A)
> > +       ret %struct.__neon_float64x1x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_float64x1x2_t
> @llvm.arm64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
> > +declare %struct.__neon_float64x1x3_t
> @llvm.arm64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
> > +declare %struct.__neon_float64x1x4_t
> @llvm.arm64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
> > +
> > +
> > +define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8>
> %L2, i8* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld2lane_16b
> > +; CHECK ld2.b { v0, v1 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x16x2_t
> @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1,
> i8* %A)
> > +       ret %struct.__neon_int8x16x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8>
> %L2, <16 x i8> %L3, i8* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld3lane_16b
> > +; CHECK ld3.b { v0, v1, v2 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x16x3_t
> @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8>
> %L3, i64 1, i8* %A)
> > +       ret %struct.__neon_int8x16x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8>
> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld4lane_16b
> > +; CHECK ld4.b { v0, v1, v2, v3 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x16x4_t
> @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8>
> %L3, <16 x i8> %L4, i64 1, i8* %A)
> > +       ret %struct.__neon_int8x16x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int8x16x2_t
> @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
> nounwind readonly
> > +declare %struct.__neon_int8x16x3_t
> @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64,
> i8*) nounwind readonly
> > +declare %struct.__neon_int8x16x4_t
> @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x
> i8>, i64, i8*) nounwind readonly
> > +
> > +define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16>
> %L2, i16* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld2lane_8h
> > +; CHECK ld2.h { v0, v1 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1,
> i16* %A)
> > +       ret %struct.__neon_int16x8x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16>
> %L2, <8 x i16> %L3, i16* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld3lane_8h
> > +; CHECK ld3.h { v0, v1, v3 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x
> i16> %L3, i64 1, i16* %A)
> > +       ret %struct.__neon_int16x8x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16>
> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld4lane_8h
> > +; CHECK ld4.h { v0, v1, v2, v3 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x
> i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
> > +       ret %struct.__neon_int16x8x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
> nounwind readonly
> > +declare %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64,
> i16*) nounwind readonly
> > +declare %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x
> i16>, i64, i16*) nounwind readonly
> > +
> > +define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32>
> %L2, i32* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld2lane_4s
> > +; CHECK ld2.s { v0, v1 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1,
> i32* %A)
> > +       ret %struct.__neon_int32x4x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32>
> %L2, <4 x i32> %L3, i32* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld3lane_4s
> > +; CHECK ld3.s { v0, v1, v2 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x
> i32> %L3, i64 1, i32* %A)
> > +       ret %struct.__neon_int32x4x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32>
> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld4lane_4s
> > +; CHECK ld4.s { v0, v1, v2, v3 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x
> i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
> > +       ret %struct.__neon_int32x4x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
> nounwind readonly
> > +declare %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64,
> i32*) nounwind readonly
> > +declare %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x
> i32>, i64, i32*) nounwind readonly
> > +
> > +define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64>
> %L2, i64* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld2lane_2d
> > +; CHECK ld2.d { v0, v1 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1,
> i64* %A)
> > +       ret %struct.__neon_int64x2x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64>
> %L2, <2 x i64> %L3, i64* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld3lane_2d
> > +; CHECK ld3.d { v0, v1, v3 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x
> i64> %L3, i64 1, i64* %A)
> > +       ret %struct.__neon_int64x2x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64>
> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld4lane_2d
> > +; CHECK ld4.d { v0, v1, v2, v3 }[1], [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x
> i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
> > +       ret %struct.__neon_int64x2x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
> nounwind readonly
> > +declare %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64,
> i64*) nounwind readonly
> > +declare %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x
> i64>, i64, i64*) nounwind readonly
> > +
> > +define <8 x i8> @ld1r_8b(i8* %bar) {
> > +; CHECK: ld1r_8b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.8b { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i8* %bar
> > +  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8
> undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
> > +  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
> > +  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
> > +  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
> > +  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
> > +  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
> > +  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
> > +  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
> > +  ret <8 x i8> %tmp9
> > +}
> > +
> > +define <16 x i8> @ld1r_16b(i8* %bar) {
> > +; CHECK: ld1r_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.16b { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i8* %bar
> > +  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8
> undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8
> undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
> > +  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
> > +  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
> > +  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
> > +  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
> > +  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
> > +  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
> > +  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
> > +  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
> > +  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
> > +  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
> > +  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
> > +  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
> > +  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
> > +  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
> > +  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
> > +  ret <16 x i8> %tmp17
> > +}
> > +
> > +define <4 x i16> @ld1r_4h(i16* %bar) {
> > +; CHECK: ld1r_4h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.4h { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i16* %bar
> > +  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16
> undef>, i16 %tmp1, i32 0
> > +  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
> > +  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
> > +  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
> > +  ret <4 x i16> %tmp5
> > +}
> > +
> > +define <8 x i16> @ld1r_8h(i16* %bar) {
> > +; CHECK: ld1r_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.8h { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i16* %bar
> > +  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16
> undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
> > +  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
> > +  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
> > +  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
> > +  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
> > +  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
> > +  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
> > +  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
> > +  ret <8 x i16> %tmp9
> > +}
> > +
> > +define <2 x i32> @ld1r_2s(i32* %bar) {
> > +; CHECK: ld1r_2s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.2s { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i32* %bar
> > +  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1,
> i32 0
> > +  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
> > +  ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @ld1r_4s(i32* %bar) {
> > +; CHECK: ld1r_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.4s { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i32* %bar
> > +  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32
> undef>, i32 %tmp1, i32 0
> > +  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
> > +  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
> > +  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
> > +  ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @ld1r_2d(i64* %bar) {
> > +; CHECK: ld1r_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.2d { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i64* %bar
> > +  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1,
> i32 0
> > +  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
> > +  ret <2 x i64> %tmp3
> > +}
> > +
> > +define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
> > +; CHECK: ld2r_8b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2r.8b { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x8x2_t
> @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x8x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
> > +; CHECK: ld3r_8b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3r.8b { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x8x3_t
> @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x8x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
> > +; CHECK: ld4r_8b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4r.8b { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x8x4_t
> @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x8x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2r.v8i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3r.v8i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4r.v8i8.p0i8(i8*)
> nounwind readonly
> > +
> > +define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
> > +; CHECK: ld2r_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2r.16b { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x16x2_t
> @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x16x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
> > +; CHECK: ld3r_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3r.16b { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x16x3_t
> @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x16x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
> > +; CHECK: ld4r_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4r.16b { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int8x16x4_t
> @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A)
> > +       ret %struct.__neon_int8x16x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int8x16x2_t
> @llvm.arm64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
> > +declare %struct.__neon_int8x16x3_t
> @llvm.arm64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
> > +declare %struct.__neon_int8x16x4_t
> @llvm.arm64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
> > +
> > +define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
> > +; CHECK: ld2r_4h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2r.4h { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x4x2_t
> @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A)
> > +       ret %struct.__neon_int16x4x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
> > +; CHECK: ld3r_4h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3r.4h { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x4x3_t
> @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A)
> > +       ret %struct.__neon_int16x4x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
> > +; CHECK: ld4r_4h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4r.4h { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int16x4x4_t
> @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A)
> > +       ret %struct.__neon_int16x4x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int16x4x2_t
> @llvm.arm64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x4x3_t
> @llvm.arm64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x4x4_t
> @llvm.arm64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
> > +
> > +define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
> > +; CHECK: ld2r_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2r.8h { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A)
> > +  ret %struct.__neon_int16x8x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
> > +; CHECK: ld3r_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3r.8h { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A)
> > +  ret %struct.__neon_int16x8x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
> > +; CHECK: ld4r_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4r.8h { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp2 = call %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A)
> > +  ret %struct.__neon_int16x8x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
> > +
> > +define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
> > +; CHECK: ld2r_2s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2r.2s { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x2x2_t
> @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x2x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
> > +; CHECK: ld3r_2s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3r.2s { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x2x3_t
> @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x2x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
> > +; CHECK: ld4r_2s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4r.2s { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x2x4_t
> @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x2x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int32x2x2_t
> @llvm.arm64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x2x3_t
> @llvm.arm64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x2x4_t
> @llvm.arm64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
> > +
> > +define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
> > +; CHECK: ld2r_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2r.4s { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x4x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
> > +; CHECK: ld3r_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3r.4s { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x4x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
> > +; CHECK: ld4r_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4r.4s { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A)
> > +       ret %struct.__neon_int32x4x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
> > +
> > +define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
> > +; CHECK: ld2r_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld2r.2d { v0, v1 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x2x2_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
> > +; CHECK: ld3r_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld3r.2d { v0, v1, v2 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x2x3_t  %tmp2
> > +}
> > +
> > +define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
> > +; CHECK: ld4r_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK ld4r.2d { v0, v1, v2, v3 }, [x0]
> > +; CHECK-NEXT ret
> > +       %tmp2 = call %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A)
> > +       ret %struct.__neon_int64x2x4_t  %tmp2
> > +}
> > +
> > +declare %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
> > +
> > +define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
> > +; CHECK: ld1_16b
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1.b { v0 }[0], [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i8* %bar
> > +  %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
> > +  ret <16 x i8> %tmp2
> > +}
> > +
> > +define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
> > +; CHECK: ld1_8h
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1.h { v0 }[0], [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i16* %bar
> > +  %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
> > +  ret <8 x i16> %tmp2
> > +}
> > +
> > +define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
> > +; CHECK: ld1_4s
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1.s { v0 }[0], [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i32* %bar
> > +  %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
> > +  ret <4 x i32> %tmp2
> > +}
> > +
> > +define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
> > +; CHECK: ld1_2d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1.d { v0 }[0], [x0]
> > +; CHECK-NEXT ret
> > +  %tmp1 = load i64* %bar
> > +  %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
> > +  ret <2 x i64> %tmp2
> > +}
> > +
> > +define <1 x i64> @ld1_1d(<1 x i64>* %p) {
> > +; CHECK: ld1_1d
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ldr [[REG:d[0-9]+]], [x0]
> > +; CHECK-NEXT: ret
> > +  %tmp = load <1 x i64>* %p, align 8
> > +  ret <1 x i64> %tmp
> > +}
> > +
> > +
> > +; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
> > +define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16*
> nocapture %diff) nounwind ssp {
> > +entry:
> > +; CHECK: ld1r_2s_from_dup
> > +; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
> > +; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
> > +; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
> > +; CHECK-NEXT: str d[[RESREGNUM]], [x2]
> > +; CHECK-NEXT: ret
> > +  %tmp = bitcast i8* %a to i32*
> > +  %tmp1 = load i32* %tmp, align 4
> > +  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
> > +  %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32>
> zeroinitializer
> > +  %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
> > +  %tmp4 = bitcast i8* %b to i32*
> > +  %tmp5 = load i32* %tmp4, align 4
> > +  %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
> > +  %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32>
> zeroinitializer
> > +  %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
> > +  %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
> > +  %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
> > +  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
> > +  %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
> > +  %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x
> i32> zeroinitializer
> > +  %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
> > +  %tmp10 = bitcast i16* %diff to <4 x i16>*
> > +  store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8
> > +  ret void
> > +}
> > +
> > +; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is
> suboptimal
> > +define <4 x float> @ld1r_4s_float(float* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_4s_float
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.4s { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load float* %x, align 4
> > +  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
> > +  %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
> > +  %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
> > +  %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
> > +  ret <4 x float> %tmp4
> > +}
> > +
> > +define <2 x float> @ld1r_2s_float(float* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_2s_float
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.2s { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load float* %x, align 4
> > +  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
> > +  %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
> > +  ret <2 x float> %tmp2
> > +}
> > +
> > +define <2 x double> @ld1r_2d_double(double* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_2d_double
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.2d { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load double* %x, align 4
> > +  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
> > +  %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
> > +  ret <2 x double> %tmp2
> > +}
> > +
> > +define <1 x double> @ld1r_1d_double(double* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_1d_double
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ldr d0, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load double* %x, align 4
> > +  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
> > +  ret <1 x double> %tmp1
> > +}
> > +
> > +define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_4s_float_shuff
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.4s { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load float* %x, align 4
> > +  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
> > +  %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32>
> zeroinitializer
> > +  ret <4 x float> %lane
> > +}
> > +
> > +define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_2s_float_shuff
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.2s { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load float* %x, align 4
> > +  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
> > +  %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32>
> zeroinitializer
> > +  ret <2 x float> %lane
> > +}
> > +
> > +define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_2d_double_shuff
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ld1r.2d { v0 }, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load double* %x, align 4
> > +  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
> > +  %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x
> i32> zeroinitializer
> > +  ret <2 x double> %lane
> > +}
> > +
> > +define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
> > +entry:
> > +; CHECK: ld1r_1d_double_shuff
> > +; Make sure we are using the operands defined by the ABI
> > +; CHECK: ldr d0, [x0]
> > +; CHECK-NEXT ret
> > +  %tmp = load double* %x, align 4
> > +  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
> > +  %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x
> i32> zeroinitializer
> > +  ret <1 x double> %lane
> > +}
> > +
> > +%struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
> > +%struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x
> float> }
> > +%struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x
> float>,  <2 x float> }
> > +
> > +declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int16x4x2_t
> @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int32x2x2_t
> @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_float32x2x2_t
> @llvm.arm64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
> > +declare %struct.__neon_int64x1x2_t
> @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_float64x1x2_t
> @llvm.arm64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
> > +
> > +define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
> > +; CHECK-LABEL: ld1_x2_v8i8:
> > +; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int8x8x2_t
> @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %addr)
> > +  ret %struct.__neon_int8x8x2_t %val
> > +}
> > +
> > +define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
> > +; CHECK-LABEL: ld1_x2_v4i16:
> > +; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int16x4x2_t
> @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %addr)
> > +  ret %struct.__neon_int16x4x2_t %val
> > +}
> > +
> > +define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
> > +; CHECK-LABEL: ld1_x2_v2i32:
> > +; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int32x2x2_t
> @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %addr)
> > +  ret %struct.__neon_int32x2x2_t %val
> > +}
> > +
> > +define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
> > +; CHECK-LABEL: ld1_x2_v2f32:
> > +; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float32x2x2_t
> @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %addr)
> > +  ret %struct.__neon_float32x2x2_t %val
> > +}
> > +
> > +define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
> > +; CHECK-LABEL: ld1_x2_v1i64:
> > +; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int64x1x2_t
> @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %addr)
> > +  ret %struct.__neon_int64x1x2_t %val
> > +}
> > +
> > +define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
> > +; CHECK-LABEL: ld1_x2_v1f64:
> > +; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float64x1x2_t
> @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %addr)
> > +  ret %struct.__neon_float64x1x2_t %val
> > +}
> > +
> > +
> > +%struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
> > +%struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x
> float> }
> > +%struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x
> float>,  <4 x float> }
> > +
> > +%struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
> > +%struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2
> x double> }
> > +%struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x
> double>,  <2 x double> }
> > +
> > +declare %struct.__neon_int8x16x2_t
> @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
> > +declare %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_float32x4x2_t
> @llvm.arm64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
> > +declare %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_float64x2x2_t
> @llvm.arm64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
> > +
> > +define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
> > +; CHECK-LABEL: ld1_x2_v16i8:
> > +; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int8x16x2_t
> @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %addr)
> > +  ret %struct.__neon_int8x16x2_t %val
> > +}
> > +
> > +define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
> > +; CHECK-LABEL: ld1_x2_v8i16:
> > +; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int16x8x2_t
> @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %addr)
> > +  ret %struct.__neon_int16x8x2_t %val
> > +}
> > +
> > +define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
> > +; CHECK-LABEL: ld1_x2_v4i32:
> > +; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int32x4x2_t
> @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %addr)
> > +  ret %struct.__neon_int32x4x2_t %val
> > +}
> > +
> > +define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
> > +; CHECK-LABEL: ld1_x2_v4f32:
> > +; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float32x4x2_t
> @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %addr)
> > +  ret %struct.__neon_float32x4x2_t %val
> > +}
> > +
> > +define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
> > +; CHECK-LABEL: ld1_x2_v2i64:
> > +; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int64x2x2_t
> @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %addr)
> > +  ret %struct.__neon_int64x2x2_t %val
> > +}
> > +
> > +define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
> > +; CHECK-LABEL: ld1_x2_v2f64:
> > +; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float64x2x2_t
> @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %addr)
> > +  ret %struct.__neon_float64x2x2_t %val
> > +}
> > +
> > +declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int16x4x3_t
> @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int32x2x3_t
> @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_float32x2x3_t
> @llvm.arm64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
> > +declare %struct.__neon_int64x1x3_t
> @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_float64x1x3_t
> @llvm.arm64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
> > +
> > +define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
> > +; CHECK-LABEL: ld1_x3_v8i8:
> > +; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int8x8x3_t
> @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %addr)
> > +  ret %struct.__neon_int8x8x3_t %val
> > +}
> > +
> > +define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
> > +; CHECK-LABEL: ld1_x3_v4i16:
> > +; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int16x4x3_t
> @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %addr)
> > +  ret %struct.__neon_int16x4x3_t %val
> > +}
> > +
> > +define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
> > +; CHECK-LABEL: ld1_x3_v2i32:
> > +; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int32x2x3_t
> @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %addr)
> > +  ret %struct.__neon_int32x2x3_t %val
> > +}
> > +
> > +define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
> > +; CHECK-LABEL: ld1_x3_v2f32:
> > +; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float32x2x3_t
> @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %addr)
> > +  ret %struct.__neon_float32x2x3_t %val
> > +}
> > +
> > +define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
> > +; CHECK-LABEL: ld1_x3_v1i64:
> > +; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int64x1x3_t
> @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %addr)
> > +  ret %struct.__neon_int64x1x3_t %val
> > +}
> > +
> > +define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
> > +; CHECK-LABEL: ld1_x3_v1f64:
> > +; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float64x1x3_t
> @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %addr)
> > +  ret %struct.__neon_float64x1x3_t %val
> > +}
> > +
> > +declare %struct.__neon_int8x16x3_t
> @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
> > +declare %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_float32x4x3_t
> @llvm.arm64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
> > +declare %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_float64x2x3_t
> @llvm.arm64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
> > +
> > +define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
> > +; CHECK-LABEL: ld1_x3_v16i8:
> > +; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int8x16x3_t
> @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %addr)
> > +  ret %struct.__neon_int8x16x3_t %val
> > +}
> > +
> > +define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
> > +; CHECK-LABEL: ld1_x3_v8i16:
> > +; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int16x8x3_t
> @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %addr)
> > +  ret %struct.__neon_int16x8x3_t %val
> > +}
> > +
> > +define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
> > +; CHECK-LABEL: ld1_x3_v4i32:
> > +; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int32x4x3_t
> @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %addr)
> > +  ret %struct.__neon_int32x4x3_t %val
> > +}
> > +
> > +define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
> > +; CHECK-LABEL: ld1_x3_v4f32:
> > +; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float32x4x3_t
> @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %addr)
> > +  ret %struct.__neon_float32x4x3_t %val
> > +}
> > +
> > +define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
> > +; CHECK-LABEL: ld1_x3_v2i64:
> > +; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_int64x2x3_t
> @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %addr)
> > +  ret %struct.__neon_int64x2x3_t %val
> > +}
> > +
> > +define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
> > +; CHECK-LABEL: ld1_x3_v2f64:
> > +; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  %val = call %struct.__neon_float64x2x3_t
> @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %addr)
> > +  ret %struct.__neon_float64x2x3_t %val
> > +}
> > +
> > +declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8*)
> nounwind readonly
> > +declare %struct.__neon_int16x4x4_t
> @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int32x2x4_t
> @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_float32x2x4_t
> @llvm.arm64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
> > +declare %struct.__neon_int64x1x4_t
> @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_float64x1x4_t
> @llvm.arm64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
> > +
> > +define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
> > +; CHECK-LABEL: ld1_x4_v8i8:
> > +; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_int8x8x4_t
> @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %addr)
> > +  ret %struct.__neon_int8x8x4_t %val
> > +}
> > +
> > +define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
> > +; CHECK-LABEL: ld1_x4_v4i16:
> > +; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_int16x4x4_t
> @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %addr)
> > +  ret %struct.__neon_int16x4x4_t %val
> > +}
> > +
> > +define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
> > +; CHECK-LABEL: ld1_x4_v2i32:
> > +; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_int32x2x4_t
> @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %addr)
> > +  ret %struct.__neon_int32x2x4_t %val
> > +}
> > +
> > +define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
> > +; CHECK-LABEL: ld1_x4_v2f32:
> > +; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_float32x2x4_t
> @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %addr)
> > +  ret %struct.__neon_float32x2x4_t %val
> > +}
> > +
> > +define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
> > +; CHECK-LABEL: ld1_x4_v1i64:
> > +; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_int64x1x4_t
> @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %addr)
> > +  ret %struct.__neon_int64x1x4_t %val
> > +}
> > +
> > +define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
> > +; CHECK-LABEL: ld1_x4_v1f64:
> > +; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_float64x1x4_t
> @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %addr)
> > +  ret %struct.__neon_float64x1x4_t %val
> > +}
> > +
> > +declare %struct.__neon_int8x16x4_t
> @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
> > +declare %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
> > +declare %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
> > +declare %struct.__neon_float32x4x4_t
> @llvm.arm64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
> > +declare %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
> > +declare %struct.__neon_float64x2x4_t
> @llvm.arm64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
> > +
> > +define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
> > +; CHECK-LABEL: ld1_x4_v16i8:
> > +; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
> }, [x0]
> > +  %val = call %struct.__neon_int8x16x4_t
> @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %addr)
> > +  ret %struct.__neon_int8x16x4_t %val
> > +}
> > +
> > +define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
> > +; CHECK-LABEL: ld1_x4_v8i16:
> > +; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_int16x8x4_t
> @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %addr)
> > +  ret %struct.__neon_int16x8x4_t %val
> > +}
> > +
> > +define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
> > +; CHECK-LABEL: ld1_x4_v4i32:
> > +; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_int32x4x4_t
> @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %addr)
> > +  ret %struct.__neon_int32x4x4_t %val
> > +}
> > +
> > +define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
> > +; CHECK-LABEL: ld1_x4_v4f32:
> > +; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_float32x4x4_t
> @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %addr)
> > +  ret %struct.__neon_float32x4x4_t %val
> > +}
> > +
> > +define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
> > +; CHECK-LABEL: ld1_x4_v2i64:
> > +; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_int64x2x4_t
> @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %addr)
> > +  ret %struct.__neon_int64x2x4_t %val
> > +}
> > +
> > +define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
> > +; CHECK-LABEL: ld1_x4_v2f64:
> > +; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  %val = call %struct.__neon_float64x2x4_t
> @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %addr)
> > +  ret %struct.__neon_float64x2x4_t %val
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/ldp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ldp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/ldp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/ldp.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,149 @@
> > +; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
> > +; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\
> > +; RUN:   -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
> > +
> > +; CHECK: ldp_int
> > +; CHECK: ldp
> > +define i32 @ldp_int(i32* %p) nounwind {
> > +  %tmp = load i32* %p, align 4
> > +  %add.ptr = getelementptr inbounds i32* %p, i64 1
> > +  %tmp1 = load i32* %add.ptr, align 4
> > +  %add = add nsw i32 %tmp1, %tmp
> > +  ret i32 %add
> > +}
> > +
> > +; CHECK: ldp_long
> > +; CHECK: ldp
> > +define i64 @ldp_long(i64* %p) nounwind {
> > +  %tmp = load i64* %p, align 8
> > +  %add.ptr = getelementptr inbounds i64* %p, i64 1
> > +  %tmp1 = load i64* %add.ptr, align 8
> > +  %add = add nsw i64 %tmp1, %tmp
> > +  ret i64 %add
> > +}
> > +
> > +; CHECK: ldp_float
> > +; CHECK: ldp
> > +define float @ldp_float(float* %p) nounwind {
> > +  %tmp = load float* %p, align 4
> > +  %add.ptr = getelementptr inbounds float* %p, i64 1
> > +  %tmp1 = load float* %add.ptr, align 4
> > +  %add = fadd float %tmp, %tmp1
> > +  ret float %add
> > +}
> > +
> > +; CHECK: ldp_double
> > +; CHECK: ldp
> > +define double @ldp_double(double* %p) nounwind {
> > +  %tmp = load double* %p, align 8
> > +  %add.ptr = getelementptr inbounds double* %p, i64 1
> > +  %tmp1 = load double* %add.ptr, align 8
> > +  %add = fadd double %tmp, %tmp1
> > +  ret double %add
> > +}
> > +
> > +; Test the load/store optimizer---combine ldurs into a ldp, if
> appropriate
> > +define i32 @ldur_int(i32* %a) nounwind {
> > +; LDUR_CHK: ldur_int
> > +; LDUR_CHK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
> > +; LDUR_CHK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
> > +; LDUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds i32* %a, i32 -1
> > +  %tmp1 = load i32* %p1, align 2
> > +  %p2 = getelementptr inbounds i32* %a, i32 -2
> > +  %tmp2 = load i32* %p2, align 2
> > +  %tmp3 = add i32 %tmp1, %tmp2
> > +  ret i32 %tmp3
> > +}
> > +
> > +define i64 @ldur_long(i64* %a) nounwind ssp {
> > +; LDUR_CHK: ldur_long
> > +; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
> > +; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
> > +; LDUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds i64* %a, i64 -1
> > +  %tmp1 = load i64* %p1, align 2
> > +  %p2 = getelementptr inbounds i64* %a, i64 -2
> > +  %tmp2 = load i64* %p2, align 2
> > +  %tmp3 = add i64 %tmp1, %tmp2
> > +  ret i64 %tmp3
> > +}
> > +
> > +define float @ldur_float(float* %a) {
> > +; LDUR_CHK: ldur_float
> > +; LDUR_CHK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
> > +; LDUR_CHK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
> > +; LDUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds float* %a, i64 -1
> > +  %tmp1 = load float* %p1, align 2
> > +  %p2 = getelementptr inbounds float* %a, i64 -2
> > +  %tmp2 = load float* %p2, align 2
> > +  %tmp3 = fadd float %tmp1, %tmp2
> > +  ret float %tmp3
> > +}
> > +
> > +define double @ldur_double(double* %a) {
> > +; LDUR_CHK: ldur_double
> > +; LDUR_CHK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
> > +; LDUR_CHK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
> > +; LDUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds double* %a, i64 -1
> > +  %tmp1 = load double* %p1, align 2
> > +  %p2 = getelementptr inbounds double* %a, i64 -2
> > +  %tmp2 = load double* %p2, align 2
> > +  %tmp3 = fadd double %tmp1, %tmp2
> > +  ret double %tmp3
> > +}
> > +
> > +; Now check some boundary conditions
> > +define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
> > +; LDUR_CHK: pairUpBarelyIn
> > +; LDUR_CHK-NOT: ldur
> > +; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
> > +; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
> > +; LDUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds i64* %a, i64 -31
> > +  %tmp1 = load i64* %p1, align 2
> > +  %p2 = getelementptr inbounds i64* %a, i64 -32
> > +  %tmp2 = load i64* %p2, align 2
> > +  %tmp3 = add i64 %tmp1, %tmp2
> > +  ret i64 %tmp3
> > +}
> > +
> > +define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
> > +; LDUR_CHK: pairUpBarelyOut
> > +; LDUR_CHK-NOT: ldp
> > +; Don't be fragile about which loads or manipulations of the base
> register
> > +; are used---just check that there isn't an ldp before the add
> > +; LDUR_CHK: add
> > +; LDUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds i64* %a, i64 -32
> > +  %tmp1 = load i64* %p1, align 2
> > +  %p2 = getelementptr inbounds i64* %a, i64 -33
> > +  %tmp2 = load i64* %p2, align 2
> > +  %tmp3 = add i64 %tmp1, %tmp2
> > +  ret i64 %tmp3
> > +}
> > +
> > +define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
> > +; LDUR_CHK: pairUpNotAligned
> > +; LDUR_CHK-NOT: ldp
> > +; LDUR_CHK: ldur
> > +; LDUR_CHK-NEXT: ldur
> > +; LDUR_CHK-NEXT: add
> > +; LDUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds i64* %a, i64 -18
> > +  %bp1 = bitcast i64* %p1 to i8*
> > +  %bp1p1 = getelementptr inbounds i8* %bp1, i64 1
> > +  %dp1 = bitcast i8* %bp1p1 to i64*
> > +  %tmp1 = load i64* %dp1, align 1
> > +
> > +  %p2 = getelementptr inbounds i64* %a, i64 -17
> > +  %bp2 = bitcast i64* %p2 to i8*
> > +  %bp2p1 = getelementptr inbounds i8* %bp2, i64 1
> > +  %dp2 = bitcast i8* %bp2p1 to i64*
> > +  %tmp2 = load i64* %dp2, align 1
> > +
> > +  %tmp3 = add i64 %tmp1, %tmp2
> > +  ret i64 %tmp3
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/ldur.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ldur.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/ldur.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/ldur.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,67 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define i64 @_f0(i64* %p) {
> > +; CHECK: f0:
> > +; CHECK: ldur x0, [x0, #-8]
> > +; CHECK-NEXT: ret
> > +  %tmp = getelementptr inbounds i64* %p, i64 -1
> > +  %ret = load i64* %tmp, align 2
> > +  ret i64 %ret
> > +}
> > +define i32 @_f1(i32* %p) {
> > +; CHECK: f1:
> > +; CHECK: ldur w0, [x0, #-4]
> > +; CHECK-NEXT: ret
> > +  %tmp = getelementptr inbounds i32* %p, i64 -1
> > +  %ret = load i32* %tmp, align 2
> > +  ret i32 %ret
> > +}
> > +define i16 @_f2(i16* %p) {
> > +; CHECK: f2:
> > +; CHECK: ldurh w0, [x0, #-2]
> > +; CHECK-NEXT: ret
> > +  %tmp = getelementptr inbounds i16* %p, i64 -1
> > +  %ret = load i16* %tmp, align 2
> > +  ret i16 %ret
> > +}
> > +define i8 @_f3(i8* %p) {
> > +; CHECK: f3:
> > +; CHECK: ldurb w0, [x0, #-1]
> > +; CHECK-NEXT: ret
> > +  %tmp = getelementptr inbounds i8* %p, i64 -1
> > +  %ret = load i8* %tmp, align 2
> > +  ret i8 %ret
> > +}
> > +
> > +define i64 @zext32(i8* %a) nounwind ssp {
> > +; CHECK-LABEL: zext32:
> > +; CHECK: ldur w0, [x0, #-12]
> > +; CHECK-NEXT: ret
> > +  %p = getelementptr inbounds i8* %a, i64 -12
> > +  %tmp1 = bitcast i8* %p to i32*
> > +  %tmp2 = load i32* %tmp1, align 4
> > +  %ret = zext i32 %tmp2 to i64
> > +
> > +  ret i64 %ret
> > +}
> > +define i64 @zext16(i8* %a) nounwind ssp {
> > +; CHECK-LABEL: zext16:
> > +; CHECK: ldurh w0, [x0, #-12]
> > +; CHECK-NEXT: ret
> > +  %p = getelementptr inbounds i8* %a, i64 -12
> > +  %tmp1 = bitcast i8* %p to i16*
> > +  %tmp2 = load i16* %tmp1, align 2
> > +  %ret = zext i16 %tmp2 to i64
> > +
> > +  ret i64 %ret
> > +}
> > +define i64 @zext8(i8* %a) nounwind ssp {
> > +; CHECK-LABEL: zext8:
> > +; CHECK: ldurb w0, [x0, #-12]
> > +; CHECK-NEXT: ret
> > +  %p = getelementptr inbounds i8* %a, i64 -12
> > +  %tmp2 = load i8* %p, align 1
> > +  %ret = zext i8 %tmp2 to i64
> > +
> > +  ret i64 %ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/ldxr-stxr.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ldxr-stxr.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/ldxr-stxr.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/ldxr-stxr.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,143 @@
> > +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
> > +
> > +%0 = type { i64, i64 }
> > +
> > +define i128 @f0(i8* %p) nounwind readonly {
> > +; CHECK-LABEL: f0:
> > +; CHECK: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
> > +entry:
> > +  %ldrexd = tail call %0 @llvm.arm64.ldxp(i8* %p)
> > +  %0 = extractvalue %0 %ldrexd, 1
> > +  %1 = extractvalue %0 %ldrexd, 0
> > +  %2 = zext i64 %0 to i128
> > +  %3 = zext i64 %1 to i128
> > +  %shl = shl nuw i128 %2, 64
> > +  %4 = or i128 %shl, %3
> > +  ret i128 %4
> > +}
> > +
> > +define i32 @f1(i8* %ptr, i128 %val) nounwind {
> > +; CHECK-LABEL: f1:
> > +; CHECK: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
> > +entry:
> > +  %tmp4 = trunc i128 %val to i64
> > +  %tmp6 = lshr i128 %val, 64
> > +  %tmp7 = trunc i128 %tmp6 to i64
> > +  %strexd = tail call i32 @llvm.arm64.stxp(i64 %tmp4, i64 %tmp7, i8*
> %ptr)
> > +  ret i32 %strexd
> > +}
> > +
> > +declare %0 @llvm.arm64.ldxp(i8*) nounwind
> > +declare i32 @llvm.arm64.stxp(i64, i64, i8*) nounwind
> > +
> > + at var = global i64 0, align 8
> > +
> > +define void @test_load_i8(i8* %addr) {
> > +; CHECK-LABEL: test_load_i8:
> > +; CHECK: ldxrb w[[LOADVAL:[0-9]+]], [x0]
> > +; CHECK-NOT: uxtb
> > +; CHECK-NOT: and
> > +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
> > +
> > +  %val = call i64 @llvm.arm64.ldxr.p0i8(i8* %addr)
> > +  %shortval = trunc i64 %val to i8
> > +  %extval = zext i8 %shortval to i64
> > +  store i64 %extval, i64* @var, align 8
> > +  ret void
> > +}
> > +
> > +define void @test_load_i16(i16* %addr) {
> > +; CHECK-LABEL: test_load_i16:
> > +; CHECK: ldxrh w[[LOADVAL:[0-9]+]], [x0]
> > +; CHECK-NOT: uxth
> > +; CHECK-NOT: and
> > +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
> > +
> > +  %val = call i64 @llvm.arm64.ldxr.p0i16(i16* %addr)
> > +  %shortval = trunc i64 %val to i16
> > +  %extval = zext i16 %shortval to i64
> > +  store i64 %extval, i64* @var, align 8
> > +  ret void
> > +}
> > +
> > +define void @test_load_i32(i32* %addr) {
> > +; CHECK-LABEL: test_load_i32:
> > +; CHECK: ldxr w[[LOADVAL:[0-9]+]], [x0]
> > +; CHECK-NOT: uxtw
> > +; CHECK-NOT: and
> > +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
> > +
> > +  %val = call i64 @llvm.arm64.ldxr.p0i32(i32* %addr)
> > +  %shortval = trunc i64 %val to i32
> > +  %extval = zext i32 %shortval to i64
> > +  store i64 %extval, i64* @var, align 8
> > +  ret void
> > +}
> > +
> > +define void @test_load_i64(i64* %addr) {
> > +; CHECK-LABEL: test_load_i64:
> > +; CHECK: ldxr x[[LOADVAL:[0-9]+]], [x0]
> > +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
> > +
> > +  %val = call i64 @llvm.arm64.ldxr.p0i64(i64* %addr)
> > +  store i64 %val, i64* @var, align 8
> > +  ret void
> > +}
> > +
> > +
> > +declare i64 @llvm.arm64.ldxr.p0i8(i8*) nounwind
> > +declare i64 @llvm.arm64.ldxr.p0i16(i16*) nounwind
> > +declare i64 @llvm.arm64.ldxr.p0i32(i32*) nounwind
> > +declare i64 @llvm.arm64.ldxr.p0i64(i64*) nounwind
> > +
> > +define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
> > +; CHECK-LABEL: test_store_i8:
> > +; CHECK-NOT: uxtb
> > +; CHECK-NOT: and
> > +; CHECK: stxrb w0, w1, [x2]
> > +  %extval = zext i8 %val to i64
> > +  %res = call i32 @llvm.arm64.stxr.p0i8(i64 %extval, i8* %addr)
> > +  ret i32 %res
> > +}
> > +
> > +define i32 @test_store_i16(i32, i16 %val, i16* %addr) {
> > +; CHECK-LABEL: test_store_i16:
> > +; CHECK-NOT: uxth
> > +; CHECK-NOT: and
> > +; CHECK: stxrh w0, w1, [x2]
> > +  %extval = zext i16 %val to i64
> > +  %res = call i32 @llvm.arm64.stxr.p0i16(i64 %extval, i16* %addr)
> > +  ret i32 %res
> > +}
> > +
> > +define i32 @test_store_i32(i32, i32 %val, i32* %addr) {
> > +; CHECK-LABEL: test_store_i32:
> > +; CHECK-NOT: uxtw
> > +; CHECK-NOT: and
> > +; CHECK: stxr w0, w1, [x2]
> > +  %extval = zext i32 %val to i64
> > +  %res = call i32 @llvm.arm64.stxr.p0i32(i64 %extval, i32* %addr)
> > +  ret i32 %res
> > +}
> > +
> > +define i32 @test_store_i64(i32, i64 %val, i64* %addr) {
> > +; CHECK-LABEL: test_store_i64:
> > +; CHECK: stxr w0, x1, [x2]
> > +  %res = call i32 @llvm.arm64.stxr.p0i64(i64 %val, i64* %addr)
> > +  ret i32 %res
> > +}
> > +
> > +declare i32 @llvm.arm64.stxr.p0i8(i64, i8*) nounwind
> > +declare i32 @llvm.arm64.stxr.p0i16(i64, i16*) nounwind
> > +declare i32 @llvm.arm64.stxr.p0i32(i64, i32*) nounwind
> > +declare i32 @llvm.arm64.stxr.p0i64(i64, i64*) nounwind
> > +
> > +; CHECK: test_clear:
> > +; CHECK: clrex
> > +define void @test_clear() {
> > +  call void @llvm.arm64.clrex()
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.clrex() nounwind
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/leaf-compact-unwind.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/leaf-compact-unwind.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/leaf-compact-unwind.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/leaf-compact-unwind.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,161 @@
> > +; Use the -disable-cfi flag so that we get the compact unwind info in
> the
> > +; emitted assembly. Compact unwind info is omitted when CFI directives
> > +; are emitted.
> > +;
> > +; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -disable-cfi < %s |
> FileCheck %s
> > +;
> > +; rdar://13070556
> > +
> > + at bar = common global i32 0, align 4
> > +
> > +; Leaf function with no stack allocation and no saving/restoring
> > +; of non-volatile registers.
> > +define i32 @foo1(i32 %a) #0 {
> > +entry:
> > +  %add = add nsw i32 %a, 42
> > +  ret i32 %add
> > +}
> > +
> > +; Leaf function with stack allocation but no saving/restoring
> > +; of non-volatile registers.
> > +define i32 @foo2(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32
> %g, i32 %h) #0 {
> > +entry:
> > +  %stack = alloca [36 x i32], align 4
> > +  br label %for.body
> > +
> > +for.body:                                         ; preds = %for.body,
> %entry
> > +  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20,
> %for.body ]
> > +  %arrayidx = getelementptr inbounds [36 x i32]* %stack, i64 0, i64
> %indvars.iv19
> > +  %0 = trunc i64 %indvars.iv19 to i32
> > +  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
> > +  %indvars.iv.next20 = add i64 %indvars.iv19, 1
> > +  %lftr.wideiv21 = trunc i64 %indvars.iv.next20 to i32
> > +  %exitcond22 = icmp eq i32 %lftr.wideiv21, 36
> > +  br i1 %exitcond22, label %for.body4, label %for.body
> > +
> > +for.body4:                                        ; preds = %for.body,
> %for.body4
> > +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 0,
> %for.body ]
> > +  %z1.016 = phi i32 [ %add, %for.body4 ], [ 0, %for.body ]
> > +  %arrayidx6 = getelementptr inbounds [36 x i32]* %stack, i64 0, i64
> %indvars.iv
> > +  %1 = load i32* %arrayidx6, align 4, !tbaa !0
> > +  %add = add nsw i32 %1, %z1.016
> > +  %indvars.iv.next = add i64 %indvars.iv, 1
> > +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> > +  %exitcond = icmp eq i32 %lftr.wideiv, 36
> > +  br i1 %exitcond, label %for.end9, label %for.body4
> > +
> > +for.end9:                                         ; preds = %for.body4
> > +  ret i32 %add
> > +}
> > +
> > +; Leaf function with no stack allocation but with saving restoring of
> > +; non-volatile registers.
> > +define i32 @foo3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32
> %g, i32 %h) #1 {
> > +entry:
> > +  %0 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %1 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %2 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %3 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %4 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %5 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %6 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %7 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %8 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %9 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %10 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %11 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %12 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %13 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %14 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %15 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %16 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %17 = load volatile i32* @bar, align 4, !tbaa !0
> > +  %factor = mul i32 %h, -2
> > +  %factor56 = mul i32 %g, -2
> > +  %factor57 = mul i32 %f, -2
> > +  %factor58 = mul i32 %e, -2
> > +  %factor59 = mul i32 %d, -2
> > +  %factor60 = mul i32 %c, -2
> > +  %factor61 = mul i32 %b, -2
> > +  %sum = add i32 %1, %0
> > +  %sum62 = add i32 %sum, %2
> > +  %sum63 = add i32 %sum62, %3
> > +  %sum64 = add i32 %sum63, %4
> > +  %sum65 = add i32 %sum64, %5
> > +  %sum66 = add i32 %sum65, %6
> > +  %sum67 = add i32 %sum66, %7
> > +  %sum68 = add i32 %sum67, %8
> > +  %sum69 = add i32 %sum68, %9
> > +  %sum70 = add i32 %sum69, %10
> > +  %sum71 = add i32 %sum70, %11
> > +  %sum72 = add i32 %sum71, %12
> > +  %sum73 = add i32 %sum72, %13
> > +  %sum74 = add i32 %sum73, %14
> > +  %sum75 = add i32 %sum74, %15
> > +  %sum76 = add i32 %sum75, %16
> > +  %sub10 = sub i32 %17, %sum76
> > +  %sub11 = add i32 %sub10, %factor
> > +  %sub12 = add i32 %sub11, %factor56
> > +  %sub13 = add i32 %sub12, %factor57
> > +  %sub14 = add i32 %sub13, %factor58
> > +  %sub15 = add i32 %sub14, %factor59
> > +  %sub16 = add i32 %sub15, %factor60
> > +  %add = add i32 %sub16, %factor61
> > +  ret i32 %add
> > +}
> > +
> > +; Leaf function with stack allocation and saving/restoring of
> non-volatile
> > +; registers.
> > +define i32 @foo4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32
> %g, i32 %h) #0 {
> > +entry:
> > +  %stack = alloca [128 x i32], align 4
> > +  %0 = zext i32 %a to i64
> > +  br label %for.body
> > +
> > +for.cond2.preheader:                              ; preds = %for.body
> > +  %1 = sext i32 %f to i64
> > +  br label %for.body4
> > +
> > +for.body:                                         ; preds = %for.body,
> %entry
> > +  %indvars.iv22 = phi i64 [ 0, %entry ], [ %indvars.iv.next23,
> %for.body ]
> > +  %2 = add nsw i64 %indvars.iv22, %0
> > +  %arrayidx = getelementptr inbounds [128 x i32]* %stack, i64 0, i64
> %indvars.iv22
> > +  %3 = trunc i64 %2 to i32
> > +  store i32 %3, i32* %arrayidx, align 4, !tbaa !0
> > +  %indvars.iv.next23 = add i64 %indvars.iv22, 1
> > +  %lftr.wideiv25 = trunc i64 %indvars.iv.next23 to i32
> > +  %exitcond26 = icmp eq i32 %lftr.wideiv25, 128
> > +  br i1 %exitcond26, label %for.cond2.preheader, label %for.body
> > +
> > +for.body4:                                        ; preds = %for.body4,
> %for.cond2.preheader
> > +  %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [
> %indvars.iv.next, %for.body4 ]
> > +  %z1.018 = phi i32 [ 0, %for.cond2.preheader ], [ %add8, %for.body4 ]
> > +  %4 = add nsw i64 %indvars.iv, %1
> > +  %arrayidx7 = getelementptr inbounds [128 x i32]* %stack, i64 0, i64 %4
> > +  %5 = load i32* %arrayidx7, align 4, !tbaa !0
> > +  %add8 = add nsw i32 %5, %z1.018
> > +  %indvars.iv.next = add i64 %indvars.iv, 1
> > +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> > +  %exitcond = icmp eq i32 %lftr.wideiv, 128
> > +  br i1 %exitcond, label %for.end11, label %for.body4
> > +
> > +for.end11:                                        ; preds = %for.body4
> > +  ret i32 %add8
> > +}
> > +
> > +attributes #0 = { readnone "target-cpu"="cyclone" }
> > +attributes #1 = { "target-cpu"="cyclone" }
> > +
> > +!0 = metadata !{metadata !"int", metadata !1}
> > +!1 = metadata !{metadata !"omnipotent char", metadata !2}
> > +!2 = metadata !{metadata !"Simple C/C++ TBAA"}
> > +
> > +; CHECK:        .section        __LD,__compact_unwind,regular,debug
> > +; CHECK:        .quad   _foo1                   ; Range Start
> > +; CHECK:        .long   33554432                ; Compact Unwind
> Encoding: 0x2000000
> > +; CHECK:        .quad   _foo2                   ; Range Start
> > +; CHECK:        .long   33591296                ; Compact Unwind
> Encoding: 0x2009000
> > +; CHECK:        .quad   _foo3                   ; Range Start
> > +; CHECK:        .long   33570831                ; Compact Unwind
> Encoding: 0x200400f
> > +; CHECK:        .quad   _foo4                   ; Range Start
> > +; CHECK:        .long   33689616                ; Compact Unwind
> Encoding: 0x2021010
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/leaf.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/leaf.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/leaf.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/leaf.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,13 @@
> > +; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
> > +; rdar://12829704
> > +
> > +define void @t8() nounwind ssp {
> > +; CHECK-LABEL: t8:
> > +; CHECK-NOT: stp       fp, lr, [sp, #-16]!
> > +; CHECK-NOT: mov       fp, sp
> > +; CHECK: nop
> > +; CHECK-NOT: mov       sp, fp
> > +; CHECK-NOT: ldp       fp, lr, [sp], #16
> > +  tail call void asm sideeffect "nop", "~{v8}"() nounwind
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/lit.local.cfg
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/lit.local.cfg?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/lit.local.cfg (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/lit.local.cfg Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,6 @@
> > +config.suffixes = ['.ll', '.c', '.cpp']
> > +
> > +targets = set(config.root.targets_to_build.split())
> > +if not 'ARM64' in targets:
> > +    config.unsupported = True
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/long-shift.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/long-shift.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/long-shift.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/long-shift.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,59 @@
> > +; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
> > +
> > +define i128 @shl(i128 %r, i128 %s) nounwind readnone {
> > +; CHECK-LABEL: shl:
> > +; CHECK: lslv  [[XREG_0:x[0-9]+]], x1, x2
> > +; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40
> > +; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2
> > +; CHECK-NEXT: lsrv  [[XREG_3:x[0-9]+]], x0, [[XREG_2]]
> > +; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]]
> > +; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64
> > +; CHECK-NEXT: lslv  [[XREG_5:x[0-9]+]], x0, [[XREG_4]]
> > +; CHECK-NEXT: cmp   [[XREG_4]], #0
> > +; CHECK-NEXT: csel  x1, [[XREG_5]], [[XREG_6]], ge
> > +; CHECK-NEXT: lslv  [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
> > +; CHECK-NEXT: csel  x0, xzr, [[SMALLSHIFT_LO]], ge
> > +; CHECK-NEXT: ret
> > +
> > +  %shl = shl i128 %r, %s
> > +  ret i128 %shl
> > +}
> > +
> > +define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
> > +; CHECK: ashr:
> > +; CHECK: lsrv  [[XREG_0:x[0-9]+]], x0, x2
> > +; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40
> > +; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2
> > +; CHECK-NEXT: lslv  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
> > +; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
> > +; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
> > +; CHECK-NEXT: asrv  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
> > +; CHECK-NEXT: cmp   [[XREG_5]], #0
> > +; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
> > +; CHECK-NEXT: asrv  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
> > +; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
> > +; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
> > +; CHECK-NEXT: ret
> > +
> > +  %shr = ashr i128 %r, %s
> > +  ret i128 %shr
> > +}
> > +
> > +define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
> > +; CHECK: lshr:
> > +; CHECK: lsrv  [[XREG_0:x[0-9]+]], x0, x2
> > +; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40
> > +; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2
> > +; CHECK-NEXT: lslv  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
> > +; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
> > +; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
> > +; CHECK-NEXT: lsrv  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
> > +; CHECK-NEXT: cmp   [[XREG_5]], #0
> > +; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
> > +; CHECK-NEXT: lsrv  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
> > +; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge
> > +; CHECK-NEXT: ret
> > +
> > +  %shr = lshr i128 %r, %s
> > +  ret i128 %shr
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/memcpy-inline.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/memcpy-inline.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/memcpy-inline.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/memcpy-inline.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,112 @@
> > +; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
> > +
> > +%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
> > +
> > + at src = external global %struct.x
> > + at dst = external global %struct.x
> > +
> > + at .str1 = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM,
> SOME STRING\00", align 1
> > + at .str2 = private unnamed_addr constant [36 x i8] c"DHRYSTONE PROGRAM,
> SOME STRING BLAH\00", align 1
> > + at .str3 = private unnamed_addr constant [24 x i8] c"DHRYSTONE PROGRAM,
> SOME\00", align 1
> > + at .str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR
>  \00", align 1
> > + at .str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
> > + at .str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00",
> align 1
> > + at spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
> > +
> > +define i32 @t0() {
> > +entry:
> > +; CHECK-LABEL: t0:
> > +; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10]
> > +; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10]
> > +; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8]
> > +; CHECK: strh [[REG1]], [x[[BASEREG2]], #8]
> > +; CHECK: ldr [[REG2:x[0-9]+]],
> > +; CHECK: str [[REG2]],
> > +  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds
> (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x*
> @src, i32 0, i32 0), i32 11, i32 8, i1 false)
> > +  ret i32 0
> > +}
> > +
> > +define void @t1(i8* nocapture %C) nounwind {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15]
> > +; CHECK: stur [[DEST]], [x0, #15]
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
> > +; CHECK: str [[DEST]], [x0]
> > +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr
> inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t2(i8* nocapture %C) nounwind {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: movz [[REG3:w[0-9]+]]
> > +; CHECK: movk [[REG3]],
> > +; CHECK: str [[REG3]], [x0, #32]
> > +; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}]
> > +; CHECK: stp [[DEST1]], [[DEST2]], [x0]
> > +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr
> inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t3(i8* nocapture %C) nounwind {
> > +entry:
> > +; CHECK-LABEL: t3:
> > +; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16]
> > +; CHECK: str [[REG4]], [x0, #16]
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
> > +; CHECK: str [[DEST]], [x0]
> > +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr
> inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t4(i8* nocapture %C) nounwind {
> > +entry:
> > +; CHECK-LABEL: t4:
> > +; CHECK: orr [[REG5:w[0-9]+]], wzr, #0x20
> > +; CHECK: strh [[REG5]], [x0, #16]
> > +; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}]
> > +; CHECK: str [[REG6]], [x0]
> > +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr
> inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t5(i8* nocapture %C) nounwind {
> > +entry:
> > +; CHECK-LABEL: t5:
> > +; CHECK: strb wzr, [x0, #6]
> > +; CHECK: movz [[REG7:w[0-9]+]], #21587
> > +; CHECK: strh [[REG7]], [x0, #4]
> > +; CHECK: movz [[REG8:w[0-9]+]],
> > +; CHECK: movk [[REG8]],
> > +; CHECK: str [[REG8]], [x0]
> > +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr
> inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t6() nounwind {
> > +entry:
> > +; CHECK-LABEL: t6:
> > +; CHECK: ldur [[REG9:x[0-9]+]], [x{{[0-9]+}}, #6]
> > +; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6]
> > +; CHECK: ldr
> > +; CHECK: str
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512
> x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]*
> @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
> > +  ret void
> > +}
> > +
> > +%struct.Foo = type { i32, i32, i32, i32 }
> > +
> > +define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b)
> nounwind {
> > +entry:
> > +; CHECK: t7
> > +; CHECK: ldr [[REG10:q[0-9]+]], [x1]
> > +; CHECK: str [[REG10]], [x0]
> > +  %0 = bitcast %struct.Foo* %a to i8*
> > +  %1 = bitcast %struct.Foo* %b to i8*
> > +  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32
> 4, i1 false)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture,
> i32, i32, i1) nounwind
> > +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture,
> i64, i32, i1) nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/memset-inline.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/memset-inline.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/memset-inline.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/memset-inline.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,27 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define void @t1(i8* nocapture %c) nounwind optsize {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: str wzr, [x0, #8]
> > +; CHECK: str xzr, [x0]
> > +  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
> > +  ret void
> > +}
> > +
> > +define void @t2() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: strh wzr, [sp, #32]
> > +; CHECK: stp xzr, xzr, [sp, #16]
> > +; CHECK: str xzr, [sp, #8]
> > +  %buf = alloca [26 x i8], align 1
> > +  %0 = getelementptr inbounds [26 x i8]* %buf, i32 0, i32 0
> > +  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
> > +  call void @something(i8* %0) nounwind
> > +  ret void
> > +}
> > +
> > +declare void @something(i8*) nounwind
> > +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
> nounwind
> > +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
> nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/memset-to-bzero.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/memset-to-bzero.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/memset-to-bzero.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/memset-to-bzero.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,101 @@
> > +; RUN: llc %s -march arm64 -o - | FileCheck %s
> > +; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls
> to memset()
> > +
> > +; CHECK: @fct1
> > +; For small size (<= 256), we do not change memset to bzero.
> > +; CHECK: memset
> > +define void @fct1(i8* nocapture %ptr) {
> > +entry:
> > +  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1,
> i1 false)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
> > +
> > +; CHECK: @fct2
> > +; When the size is bigger than 256, change into bzero.
> > +; CHECK: bzero
> > +define void @fct2(i8* nocapture %ptr) {
> > +entry:
> > +  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1,
> i1 false)
> > +  ret void
> > +}
> > +
> > +; CHECK: @fct3
> > +; For unknown size, change to bzero.
> > +; CHECK: bzero
> > +define void @fct3(i8* nocapture %ptr, i32 %unknown) {
> > +entry:
> > +  %conv = sext i32 %unknown to i64
> > +  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32
> 1, i1 false)
> > +  ret void
> > +}
> > +
> > +; CHECK: @fct4
> > +; Size <= 256, no change.
> > +; CHECK: memset
> > +define void @fct4(i8* %ptr) {
> > +entry:
> > +  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
> > +  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64
> %tmp)
> > +  ret void
> > +}
> > +
> > +declare i8* @__memset_chk(i8*, i32, i64, i64)
> > +
> > +declare i64 @llvm.objectsize.i64(i8*, i1)
> > +
> > +; CHECK: @fct5
> > +; Size > 256, change.
> > +; CHECK: bzero
> > +define void @fct5(i8* %ptr) {
> > +entry:
> > +  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
> > +  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64
> %tmp)
> > +  ret void
> > +}
> > +
> > +; CHECK: @fct6
> > +; Size = unknown, change.
> > +; CHECK: bzero
> > +define void @fct6(i8* %ptr, i32 %unknown) {
> > +entry:
> > +  %conv = sext i32 %unknown to i64
> > +  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
> > +  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64
> %tmp)
> > +  ret void
> > +}
> > +
> > +; Next functions check that memset is not turned into bzero
> > +; when the set constant is non-zero, whatever the given size.
> > +
> > +; CHECK: @fct7
> > +; memset with something that is not a zero, no change.
> > +; CHECK: memset
> > +define void @fct7(i8* %ptr) {
> > +entry:
> > +  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
> > +  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64
> %tmp)
> > +  ret void
> > +}
> > +
> > +; CHECK: @fct8
> > +; memset with something that is not a zero, no change.
> > +; CHECK: memset
> > +define void @fct8(i8* %ptr) {
> > +entry:
> > +  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
> > +  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64
> %tmp)
> > +  ret void
> > +}
> > +
> > +; CHECK: @fct9
> > +; memset with something that is not a zero, no change.
> > +; CHECK: memset
> > +define void @fct9(i8* %ptr, i32 %unknown) {
> > +entry:
> > +  %conv = sext i32 %unknown to i64
> > +  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
> > +  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64
> %tmp)
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/movi.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/movi.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/movi.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/movi.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,202 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> >
> +;==--------------------------------------------------------------------------==
> > +; Tests for MOV-immediate implemented with ORR-immediate.
> >
> +;==--------------------------------------------------------------------------==
> > +
> > +; 64-bit immed with 32-bit pattern size, rotated by 0.
> > +define i64 @test64_32_rot0() nounwind {
> > +; CHECK: test64_32_rot0
> > +; CHECK: orr x0, xzr, #0x700000007
> > +  ret i64 30064771079
> > +}
> > +
> > +; 64-bit immed with 32-bit pattern size, rotated by 2.
> > +define i64 @test64_32_rot2() nounwind {
> > +; CHECK: test64_32_rot2
> > +; CHECK: orr x0, xzr, #0xc0000003c0000003
> > +  ret i64 13835058071388291075
> > +}
> > +
> > +; 64-bit immed with 4-bit pattern size, rotated by 3.
> > +define i64 @test64_4_rot3() nounwind {
> > +; CHECK: test64_4_rot3
> > +; CHECK: orr  x0, xzr, #0xeeeeeeeeeeeeeeee
> > +  ret i64 17216961135462248174
> > +}
> > +
> > +; 32-bit immed with 32-bit pattern size, rotated by 16.
> > +define i32 @test32_32_rot16() nounwind {
> > +; CHECK: test32_32_rot16
> > +; CHECK: orr w0, wzr, #0xff0000
> > +  ret i32 16711680
> > +}
> > +
> > +; 32-bit immed with 2-bit pattern size, rotated by 1.
> > +define i32 @test32_2_rot1() nounwind {
> > +; CHECK: test32_2_rot1
> > +; CHECK: orr w0, wzr, #0xaaaaaaaa
> > +  ret i32 2863311530
> > +}
> > +
> >
> +;==--------------------------------------------------------------------------==
> > +; Tests for MOVZ with MOVK.
> >
> +;==--------------------------------------------------------------------------==
> > +
> > +define i32 @movz() nounwind {
> > +; CHECK: movz
> > +; CHECK: movz w0, #5
> > +  ret i32 5
> > +}
> > +
> > +define i64 @movz_3movk() nounwind {
> > +; CHECK: movz_3movk
> > +; CHECK:      movz x0, #5, lsl #48
> > +; CHECK-NEXT: movk x0, #4660, lsl #32
> > +; CHECK-NEXT: movk x0, #43981, lsl #16
> > +; CHECK-NEXT: movk x0, #22136
> > +  ret i64 1427392313513592
> > +}
> > +
> > +define i64 @movz_movk_skip1() nounwind {
> > +; CHECK: movz_movk_skip1
> > +; CHECK:      movz x0, #5, lsl #32
> > +; CHECK-NEXT: movk x0, #17185, lsl #16
> > +  ret i64 22601072640
> > +}
> > +
> > +define i64 @movz_skip1_movk() nounwind {
> > +; CHECK: movz_skip1_movk
> > +; CHECK:      movz x0, #34388, lsl #32
> > +; CHECK-NEXT: movk x0, #4660
> > +  ret i64 147695335379508
> > +}
> > +
> >
> +;==--------------------------------------------------------------------------==
> > +; Tests for MOVN with MOVK.
> >
> +;==--------------------------------------------------------------------------==
> > +
> > +define i64 @movn() nounwind {
> > +; CHECK: movn
> > +; CHECK: movn x0, #41
> > +  ret i64 -42
> > +}
> > +
> > +define i64 @movn_skip1_movk() nounwind {
> > +; CHECK: movn_skip1_movk
> > +; CHECK:      movn x0, #41, lsl #32
> > +; CHECK-NEXT: movk x0, #4660
> > +  ret i64 -176093720012
> > +}
> > +
> >
> +;==--------------------------------------------------------------------------==
> > +; Tests for ORR with MOVK.
> >
> +;==--------------------------------------------------------------------------==
> > +; rdar://14987673
> > +
> > +define i64 @orr_movk1() nounwind {
> > +; CHECK: orr_movk1
> > +; CHECK: orr x0, xzr, #0xffff0000ffff0
> > +; CHECK: movk x0, #57005, lsl #16
> > +  ret i64 72056498262245120
> > +}
> > +
> > +define i64 @orr_movk2() nounwind {
> > +; CHECK: orr_movk2
> > +; CHECK: orr x0, xzr, #0xffff0000ffff0
> > +; CHECK: movk x0, #57005, lsl #48
> > +  ret i64 -2400982650836746496
> > +}
> > +
> > +define i64 @orr_movk3() nounwind {
> > +; CHECK: orr_movk3
> > +; CHECK: orr x0, xzr, #0xffff0000ffff0
> > +; CHECK: movk x0, #57005, lsl #32
> > +  ret i64 72020953688702720
> > +}
> > +
> > +define i64 @orr_movk4() nounwind {
> > +; CHECK: orr_movk4
> > +; CHECK: orr x0, xzr, #0xffff0000ffff0
> > +; CHECK: movk x0, #57005
> > +  ret i64 72056494543068845
> > +}
> > +
> > +; rdar://14987618
> > +define i64 @orr_movk5() nounwind {
> > +; CHECK: orr_movk5
> > +; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
> > +; CHECK: movk x0, #57005, lsl #16
> > +  ret i64 -71777214836900096
> > +}
> > +
> > +define i64 @orr_movk6() nounwind {
> > +; CHECK: orr_movk6
> > +; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
> > +; CHECK: movk x0, #57005, lsl #16
> > +; CHECK: movk x0, #57005, lsl #48
> > +  ret i64 -2400982647117578496
> > +}
> > +
> > +define i64 @orr_movk7() nounwind {
> > +; CHECK: orr_movk7
> > +; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
> > +; CHECK: movk x0, #57005, lsl #48
> > +  ret i64 -2400982646575268096
> > +}
> > +
> > +define i64 @orr_movk8() nounwind {
> > +; CHECK: orr_movk8
> > +; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
> > +; CHECK: movk x0, #57005
> > +; CHECK: movk x0, #57005, lsl #48
> > +  ret i64 -2400982646575276371
> > +}
> > +
> > +; rdar://14987715
> > +define i64 @orr_movk9() nounwind {
> > +; CHECK: orr_movk9
> > +; CHECK: orr x0, xzr, #0xffffff000000000
> > +; CHECK: movk x0, #65280
> > +; CHECK: movk x0, #57005, lsl #16
> > +  ret i64 1152921439623315200
> > +}
> > +
> > +define i64 @orr_movk10() nounwind {
> > +; CHECK: orr_movk10
> > +; CHECK: orr x0, xzr, #0xfffffffffffff00
> > +; CHECK: movk x0, #57005, lsl #16
> > +  ret i64 1152921504047824640
> > +}
> > +
> > +define i64 @orr_movk11() nounwind {
> > +; CHECK: orr_movk11
> > +; CHECK: orr x0, xzr, #0xfff00000000000ff
> > +; CHECK: movk x0, #57005, lsl #16
> > +; CHECK: movk x0, #65535, lsl #32
> > +  ret i64 -4222125209747201
> > +}
> > +
> > +define i64 @orr_movk12() nounwind {
> > +; CHECK: orr_movk12
> > +; CHECK: orr x0, xzr, #0xfff00000000000ff
> > +; CHECK: movk x0, #57005, lsl #32
> > +  ret i64 -4258765016661761
> > +}
> > +
> > +define i64 @orr_movk13() nounwind {
> > +; CHECK: orr_movk13
> > +; CHECK: orr x0, xzr, #0xfffff000000
> > +; CHECK: movk x0, #57005
> > +; CHECK: movk x0, #57005, lsl #48
> > +  ret i64 -2401245434149282131
> > +}
> > +
> > +; rdar://13944082
> > +define i64 @g() nounwind {
> > +; CHECK: g
> > +; CHECK: movz x0, #65535, lsl #48
> > +; CHECK: movk x0, #2
> > +entry:
> > +  ret i64 -281474976710654
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/mul.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/mul.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/mul.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/mul.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,90 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +; rdar://9296808
> > +; rdar://9349137
> > +
> > +define i128 @t1(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> > +; CHECK: umulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> > +  %tmp1 = zext i64 %a to i128
> > +  %tmp2 = zext i64 %b to i128
> > +  %tmp3 = mul i128 %tmp1, %tmp2
> > +  ret i128 %tmp3
> > +}
> > +
> > +define i128 @t2(i64 %a, i64 %b) nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> > +; CHECK: smulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> > +  %tmp1 = sext i64 %a to i128
> > +  %tmp2 = sext i64 %b to i128
> > +  %tmp3 = mul i128 %tmp1, %tmp2
> > +  ret i128 %tmp3
> > +}
> > +
> > +define i64 @t3(i32 %a, i32 %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: t3:
> > +; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> > +  %tmp1 = zext i32 %a to i64
> > +  %tmp2 = zext i32 %b to i64
> > +  %tmp3 = mul i64 %tmp1, %tmp2
> > +  ret i64 %tmp3
> > +}
> > +
> > +define i64 @t4(i32 %a, i32 %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: t4:
> > +; CHECK: smull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> > +  %tmp1 = sext i32 %a to i64
> > +  %tmp2 = sext i32 %b to i64
> > +  %tmp3 = mul i64 %tmp1, %tmp2
> > +  ret i64 %tmp3
> > +}
> > +
> > +define i64 @t5(i32 %a, i32 %b, i64 %c) nounwind {
> > +entry:
> > +; CHECK-LABEL: t5:
> > +; CHECK: umaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
> > +  %tmp1 = zext i32 %a to i64
> > +  %tmp2 = zext i32 %b to i64
> > +  %tmp3 = mul i64 %tmp1, %tmp2
> > +  %tmp4 = add i64 %c, %tmp3
> > +  ret i64 %tmp4
> > +}
> > +
> > +define i64 @t6(i32 %a, i32 %b, i64 %c) nounwind {
> > +entry:
> > +; CHECK-LABEL: t6:
> > +; CHECK: smsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
> > +  %tmp1 = sext i32 %a to i64
> > +  %tmp2 = sext i32 %b to i64
> > +  %tmp3 = mul i64 %tmp1, %tmp2
> > +  %tmp4 = sub i64 %c, %tmp3
> > +  ret i64 %tmp4
> > +}
> > +
> > +define i64 @t7(i32 %a, i32 %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: t7:
> > +; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> > +  %tmp1 = zext i32 %a to i64
> > +  %tmp2 = zext i32 %b to i64
> > +  %tmp3 = mul i64 %tmp1, %tmp2
> > +  %tmp4 = sub i64 0, %tmp3
> > +  ret i64 %tmp4
> > +}
> > +
> > +define i64 @t8(i32 %a, i32 %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: t8:
> > +; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> > +  %tmp1 = sext i32 %a to i64
> > +  %tmp2 = sext i32 %b to i64
> > +  %tmp3 = mul i64 %tmp1, %tmp2
> > +  %tmp4 = sub i64 0, %tmp3
> > +  ret i64 %tmp4
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/neon-compare-instructions.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/neon-compare-instructions.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/neon-compare-instructions.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/neon-compare-instructions.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,1191 @@
> > +; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s
> > +
> > +define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp eq <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmeq16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp eq <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmeq4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
> > +       %tmp3 = icmp eq <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmeq8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> > +       %tmp3 = icmp eq <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmeq2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> > +       %tmp3 = icmp eq <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmeq4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> > +       %tmp3 = icmp eq <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> > +       %tmp3 = icmp eq <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ne <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
> > +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ne <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> > +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ne <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmgt8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp sgt <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmgt16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp sgt <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmgt4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
> > +       %tmp3 = icmp sgt <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmgt8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> > +       %tmp3 = icmp sgt <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmgt2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> > +       %tmp3 = icmp sgt <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmgt4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> > +       %tmp3 = icmp sgt <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmgt2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> > +       %tmp3 = icmp sgt <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmlt8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LT implemented as GT, so check reversed operands.
> > +;CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
> > +       %tmp3 = icmp slt <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmlt16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LT implemented as GT, so check reversed operands.
> > +;CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
> > +       %tmp3 = icmp slt <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmlt4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LT implemented as GT, so check reversed operands.
> > +;CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
> > +       %tmp3 = icmp slt <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmlt8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LT implemented as GT, so check reversed operands.
> > +;CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
> > +       %tmp3 = icmp slt <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmlt2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LT implemented as GT, so check reversed operands.
> > +;CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
> > +       %tmp3 = icmp slt <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmlt4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LT implemented as GT, so check reversed operands.
> > +;CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
> > +       %tmp3 = icmp slt <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmlt2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LT implemented as GT, so check reversed operands.
> > +;CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
> > +       %tmp3 = icmp slt <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmge8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp sge <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmge16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp sge <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmge4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
> > +       %tmp3 = icmp sge <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmge8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> > +       %tmp3 = icmp sge <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmge2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> > +       %tmp3 = icmp sge <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmge4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> > +       %tmp3 = icmp sge <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmge2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> > +       %tmp3 = icmp sge <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmle8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LE implemented as GE, so check reversed operands.
> > +;CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
> > +       %tmp3 = icmp sle <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmle16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LE implemented as GE, so check reversed operands.
> > +;CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
> > +       %tmp3 = icmp sle <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmle4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LE implemented as GE, so check reversed operands.
> > +;CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
> > +       %tmp3 = icmp sle <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmle8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LE implemented as GE, so check reversed operands.
> > +;CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
> > +       %tmp3 = icmp sle <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmle2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LE implemented as GE, so check reversed operands.
> > +;CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
> > +       %tmp3 = icmp sle <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmle4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LE implemented as GE, so check reversed operands.
> > +;CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
> > +       %tmp3 = icmp sle <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmle2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LE implemented as GE, so check reversed operands.
> > +;CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
> > +       %tmp3 = icmp sle <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmhi8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +;CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ugt <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmhi16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +;CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ugt <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmhi4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +;CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
> > +       %tmp3 = icmp ugt <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmhi8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +;CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> > +       %tmp3 = icmp ugt <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmhi2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +;CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> > +       %tmp3 = icmp ugt <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmhi4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +;CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> > +       %tmp3 = icmp ugt <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmhi2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +;CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> > +       %tmp3 = icmp ugt <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmlo8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
> > +       %tmp3 = icmp ult <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmlo16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
> > +       %tmp3 = icmp ult <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmlo4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
> > +       %tmp3 = icmp ult <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmlo8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
> > +       %tmp3 = icmp ult <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmlo2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
> > +       %tmp3 = icmp ult <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmlo4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
> > +       %tmp3 = icmp ult <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmlo2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
> > +       %tmp3 = icmp ult <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmhs8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +;CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp uge <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmhs16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +;CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp uge <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmhs4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +;CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
> > +       %tmp3 = icmp uge <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmhs8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +;CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> > +       %tmp3 = icmp uge <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmhs2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +;CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> > +       %tmp3 = icmp uge <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmhs4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +;CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> > +       %tmp3 = icmp uge <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmhs2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +;CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> > +       %tmp3 = icmp uge <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmls8xi8(<8 x i8> %A, <8 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
> > +       %tmp3 = icmp ule <8 x i8> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmls16xi8(<16 x i8> %A, <16 x i8> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
> > +       %tmp3 = icmp ule <16 x i8> %A, %B;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmls4xi16(<4 x i16> %A, <4 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
> > +       %tmp3 = icmp ule <4 x i16> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmls8xi16(<8 x i16> %A, <8 x i16> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
> > +       %tmp3 = icmp ule <8 x i16> %A, %B;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmls2xi32(<2 x i32> %A, <2 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
> > +       %tmp3 = icmp ule <2 x i32> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmls4xi32(<4 x i32> %A, <4 x i32> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
> > +       %tmp3 = icmp ule <4 x i32> %A, %B;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
> > +       %tmp3 = icmp ule <2 x i64> %A, %B;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +
> > +define <8 x i8> @cmeqz8xi8(<8 x i8> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
> > +       %tmp3 = icmp eq <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmeqz16xi8(<16 x i8> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
> > +       %tmp3 = icmp eq <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmeqz4xi16(<4 x i16> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
> > +       %tmp3 = icmp eq <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmeqz8xi16(<8 x i16> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
> > +       %tmp3 = icmp eq <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmeqz2xi32(<2 x i32> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
> > +       %tmp3 = icmp eq <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmeqz4xi32(<4 x i32> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
> > +       %tmp3 = icmp eq <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmeqz2xi64(<2 x i64> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
> > +       %tmp3 = icmp eq <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +
> > +define <8 x i8> @cmgez8xi8(<8 x i8> %A) {
> > +;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
> > +       %tmp3 = icmp sge <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmgez16xi8(<16 x i8> %A) {
> > +;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
> > +       %tmp3 = icmp sge <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmgez4xi16(<4 x i16> %A) {
> > +;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
> > +       %tmp3 = icmp sge <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmgez8xi16(<8 x i16> %A) {
> > +;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
> > +       %tmp3 = icmp sge <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmgez2xi32(<2 x i32> %A) {
> > +;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
> > +       %tmp3 = icmp sge <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmgez4xi32(<4 x i32> %A) {
> > +;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
> > +       %tmp3 = icmp sge <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
> > +;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
> > +       %tmp3 = icmp sge <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +
> > +define <8 x i8> @cmgtz8xi8(<8 x i8> %A) {
> > +;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
> > +       %tmp3 = icmp sgt <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmgtz16xi8(<16 x i8> %A) {
> > +;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
> > +       %tmp3 = icmp sgt <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmgtz4xi16(<4 x i16> %A) {
> > +;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
> > +       %tmp3 = icmp sgt <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmgtz8xi16(<8 x i16> %A) {
> > +;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
> > +       %tmp3 = icmp sgt <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmgtz2xi32(<2 x i32> %A) {
> > +;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
> > +       %tmp3 = icmp sgt <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmgtz4xi32(<4 x i32> %A) {
> > +;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
> > +       %tmp3 = icmp sgt <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmgtz2xi64(<2 x i64> %A) {
> > +;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
> > +       %tmp3 = icmp sgt <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmlez8xi8(<8 x i8> %A) {
> > +;CHECK: cmle {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
> > +       %tmp3 = icmp sle <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmlez16xi8(<16 x i8> %A) {
> > +;CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
> > +       %tmp3 = icmp sle <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmlez4xi16(<4 x i16> %A) {
> > +;CHECK: cmle {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
> > +       %tmp3 = icmp sle <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmlez8xi16(<8 x i16> %A) {
> > +;CHECK: cmle {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
> > +       %tmp3 = icmp sle <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmlez2xi32(<2 x i32> %A) {
> > +;CHECK: cmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
> > +       %tmp3 = icmp sle <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmlez4xi32(<4 x i32> %A) {
> > +;CHECK: cmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
> > +       %tmp3 = icmp sle <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmlez2xi64(<2 x i64> %A) {
> > +;CHECK: cmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
> > +       %tmp3 = icmp sle <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmltz8xi8(<8 x i8> %A) {
> > +;CHECK: cmlt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
> > +       %tmp3 = icmp slt <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmltz16xi8(<16 x i8> %A) {
> > +;CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
> > +       %tmp3 = icmp slt <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmltz4xi16(<4 x i16> %A) {
> > +;CHECK: cmlt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
> > +       %tmp3 = icmp slt <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmltz8xi16(<8 x i16> %A) {
> > +;CHECK: cmlt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
> > +       %tmp3 = icmp slt <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmltz2xi32(<2 x i32> %A) {
> > +;CHECK: cmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
> > +       %tmp3 = icmp slt <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmltz4xi32(<4 x i32> %A) {
> > +;CHECK: cmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
> > +       %tmp3 = icmp slt <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
> > +;CHECK: cmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
> > +       %tmp3 = icmp slt <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
> > +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
> > +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
> > +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
> > +;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
> > +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> > +       %tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmhsz8xi8(<8 x i8> %A) {
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b
> > +       %tmp3 = icmp uge <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmhsz16xi8(<16 x i8> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b
> > +       %tmp3 = icmp uge <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmhsz4xi16(<4 x i16> %A) {
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h
> > +       %tmp3 = icmp uge <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmhsz8xi16(<8 x i16> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h
> > +       %tmp3 = icmp uge <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmhsz2xi32(<2 x i32> %A) {
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s
> > +       %tmp3 = icmp uge <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmhsz4xi32(<4 x i32> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s
> > +       %tmp3 = icmp uge <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d
> > +       %tmp3 = icmp uge <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +
> > +define <8 x i8> @cmhiz8xi8(<8 x i8> %A) {
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b
> > +       %tmp3 = icmp ugt <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmhiz16xi8(<16 x i8> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b
> > +       %tmp3 = icmp ugt <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmhiz4xi16(<4 x i16> %A) {
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h
> > +       %tmp3 = icmp ugt <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmhiz8xi16(<8 x i16> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h
> > +       %tmp3 = icmp ugt <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmhiz2xi32(<2 x i32> %A) {
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s
> > +       %tmp3 = icmp ugt <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmhiz4xi32(<4 x i32> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s
> > +       %tmp3 = icmp ugt <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmhiz2xi64(<2 x i64> %A) {
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d
> > +       %tmp3 = icmp ugt <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v[[ZERO]].8b, v0.8b
> > +       %tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b
> > +       %tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h
> > +       %tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h
> > +       %tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s
> > +       %tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s
> > +       %tmp3 = icmp ule <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmlsz2xi64(<2 x i64> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LS implemented as HS, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d
> > +       %tmp3 = icmp ule <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v[[ZERO]].8b, {{v[0-9]+}}.8b
> > +       %tmp3 = icmp ult <8 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
> > +       ret <8 x i8> %tmp4
> > +}
> > +
> > +define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b
> > +       %tmp3 = icmp ult <16 x i8> %A, zeroinitializer;
> > +   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
> > +       ret <16 x i8> %tmp4
> > +}
> > +
> > +define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h
> > +       %tmp3 = icmp ult <4 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
> > +       ret <4 x i16> %tmp4
> > +}
> > +
> > +define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h
> > +       %tmp3 = icmp ult <8 x i16> %A, zeroinitializer;
> > +   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
> > +       ret <8 x i16> %tmp4
> > +}
> > +
> > +define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: movi d[[ZERO:[0-9]+]], #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s
> > +       %tmp3 = icmp ult <2 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
> > +       ret <2 x i32> %tmp4
> > +}
> > +
> > +define <4 x i32> @cmloz4xi32(<4 x i32> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s
> > +       %tmp3 = icmp ult <4 x i32> %A, zeroinitializer;
> > +   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
> > +       ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
> > +; Using registers other than v0, v1 are possible, but would be odd.
> > +; LO implemented as HI, so check reversed operands.
> > +;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
> > +;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d
> > +       %tmp3 = icmp ult <2 x i64> %A, zeroinitializer;
> > +   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
> > +       ret <2 x i64> %tmp4
> > +}
> > +
> > +define <1 x i64> @cmeqz_v1i64(<1 x i64> %A) {
> > +; CHECK-LABEL: cmeqz_v1i64:
> > +; CHECK: cmeq d0, d0, #0
> > +  %tst = icmp eq <1 x i64> %A, <i64 0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @cmgez_v1i64(<1 x i64> %A) {
> > +; CHECK-LABEL: cmgez_v1i64:
> > +; CHECK: cmge d0, d0, #0
> > +  %tst = icmp sge <1 x i64> %A, <i64 0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @cmgtz_v1i64(<1 x i64> %A) {
> > +; CHECK-LABEL: cmgtz_v1i64:
> > +; CHECK: cmgt d0, d0, #0
> > +  %tst = icmp sgt <1 x i64> %A, <i64 0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @cmlez_v1i64(<1 x i64> %A) {
> > +; CHECK-LABEL: cmlez_v1i64:
> > +; CHECK: cmle d0, d0, #0
> > +  %tst = icmp sle <1 x i64> %A, <i64 0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @cmltz_v1i64(<1 x i64> %A) {
> > +; CHECK-LABEL: cmltz_v1i64:
> > +; CHECK: cmlt d0, d0, #0
> > +  %tst = icmp slt <1 x i64> %A, <i64 0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmeqz_v1f64(<1 x double> %A) {
> > +; CHECK-LABEL: fcmeqz_v1f64:
> > +; CHECK: fcmeq d0, d0, #0
> > +  %tst = fcmp oeq <1 x double> %A, <double 0.0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmgez_v1f64(<1 x double> %A) {
> > +; CHECK-LABEL: fcmgez_v1f64:
> > +; CHECK: fcmge d0, d0, #0
> > +  %tst = fcmp oge <1 x double> %A, <double 0.0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmgtz_v1f64(<1 x double> %A) {
> > +; CHECK-LABEL: fcmgtz_v1f64:
> > +; CHECK: fcmgt d0, d0, #0
> > +  %tst = fcmp ogt <1 x double> %A, <double 0.0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmlez_v1f64(<1 x double> %A) {
> > +; CHECK-LABEL: fcmlez_v1f64:
> > +; CHECK: fcmle d0, d0, #0
> > +  %tst = fcmp ole <1 x double> %A, <double 0.0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmltz_v1f64(<1 x double> %A) {
> > +; CHECK-LABEL: fcmltz_v1f64:
> > +; CHECK: fcmlt d0, d0, #0
> > +  %tst = fcmp olt <1 x double> %A, <double 0.0>
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/patchpoint.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/patchpoint.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/patchpoint.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/patchpoint.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,163 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=0 |
> FileCheck %s
> > +
> > +; Trivial patchpoint codegen
> > +;
> > +define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64
> %p4) {
> > +entry:
> > +; CHECK-LABEL: trivial_patchpoint_codegen:
> > +; CHECK:       movz x16, #57005, lsl #32
> > +; CHECK-NEXT:  movk x16, #48879, lsl #16
> > +; CHECK-NEXT:  movk x16, #51966
> > +; CHECK-NEXT:  blr  x16
> > +; CHECK:       movz x16, #57005, lsl #32
> > +; CHECK-NEXT:  movk x16, #48879, lsl #16
> > +; CHECK-NEXT:  movk x16, #51967
> > +; CHECK-NEXT:  blr  x16
> > +; CHECK:       ret
> > +  %resolveCall2 = inttoptr i64 244837814094590 to i8*
> > +  %result = tail call i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %resolveCall2, i32 4,
> i64 %p1, i64 %p2, i64 %p3, i64 %p4)
> > +  %resolveCall3 = inttoptr i64 244837814094591 to i8*
> > +  tail call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 3, i32 20, i8* %resolveCall3, i32 2,
> i64 %p1, i64 %result)
> > +  ret i64 %result
> > +}
> > +
> > +; Caller frame metadata with stackmaps. This should not be optimized
> > +; as a leaf function.
> > +;
> > +; CHECK-LABEL: caller_meta_leaf
> > +; CHECK:       mov fp, sp
> > +; CHECK-NEXT:  sub sp, sp, #32
> > +; CHECK:       Ltmp
> > +; CHECK:       mov sp, fp
> > +; CHECK:       ret
> > +
> > +define void @caller_meta_leaf() {
> > +entry:
> > +  %metadata = alloca i64, i32 3, align 8
> > +  store i64 11, i64* %metadata
> > +  store i64 12, i64* %metadata
> > +  store i64 13, i64* %metadata
> > +  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0,
> i64* %metadata)
> > +  ret void
> > +}
> > +
> > +; Test the webkit_jscc calling convention.
> > +; One argument will be passed in register, the other will be pushed on
> the stack.
> > +; Return value in x0.
> > +define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64
> %p4) {
> > +entry:
> > +; CHECK-LABEL: jscall_patchpoint_codegen:
> > +; CHECK:      Ltmp
> > +; CHECK:      str x{{.+}}, [sp]
> > +; CHECK-NEXT: mov  x0, x{{.+}}
> > +; CHECK:      Ltmp
> > +; CHECK-NEXT: movz  x16, #65535, lsl #32
> > +; CHECK-NEXT: movk  x16, #57005, lsl #16
> > +; CHECK-NEXT: movk  x16, #48879
> > +; CHECK-NEXT: blr x16
> > +  %resolveCall2 = inttoptr i64 281474417671919 to i8*
> > +  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2,
> i64 %p4, i64 %p2)
> > +  %resolveCall3 = inttoptr i64 244837814038255 to i8*
> > +  tail call webkit_jscc void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2,
> i64 %p4, i64 %result)
> > +  ret void
> > +}
> > +
> > +; Test if the arguments are properly aligned and that we don't store
> undef arguments.
> > +define i64 @jscall_patchpoint_codegen2(i64 %callee) {
> > +entry:
> > +; CHECK-LABEL: jscall_patchpoint_codegen2:
> > +; CHECK:      Ltmp
> > +; CHECK:      orr x{{.+}}, xzr, #0x6
> > +; CHECK-NEXT: str x{{.+}}, [sp, #24]
> > +; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
> > +; CHECK-NEXT: str w{{.+}}, [sp, #16]
> > +; CHECK-NEXT: orr x{{.+}}, xzr, #0x2
> > +; CHECK-NEXT: str x{{.+}}, [sp]
> > +; CHECK:      Ltmp
> > +; CHECK-NEXT: movz  x16, #65535, lsl #32
> > +; CHECK-NEXT: movk  x16, #57005, lsl #16
> > +; CHECK-NEXT: movk  x16, #48879
> > +; CHECK-NEXT: blr x16
> > +  %call = inttoptr i64 281474417671919 to i8*
> > +  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64
> %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
> > +  ret i64 %result
> > +}
> > +
> > +; Test if the arguments are properly aligned and that we don't store
> undef arguments.
> > +define i64 @jscall_patchpoint_codegen3(i64 %callee) {
> > +entry:
> > +; CHECK-LABEL: jscall_patchpoint_codegen3:
> > +; CHECK:      Ltmp
> > +; CHECK:      movz  x{{.+}}, #10
> > +; CHECK-NEXT: str x{{.+}}, [sp, #48]
> > +; CHECK-NEXT: orr w{{.+}}, wzr, #0x8
> > +; CHECK-NEXT: str w{{.+}}, [sp, #36]
> > +; CHECK-NEXT: orr x{{.+}}, xzr, #0x6
> > +; CHECK-NEXT: str x{{.+}}, [sp, #24]
> > +; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
> > +; CHECK-NEXT: str w{{.+}}, [sp, #16]
> > +; CHECK-NEXT: orr x{{.+}}, xzr, #0x2
> > +; CHECK-NEXT: str x{{.+}}, [sp]
> > +; CHECK:      Ltmp
> > +; CHECK-NEXT: movz  x16, #65535, lsl #32
> > +; CHECK-NEXT: movk  x16, #57005, lsl #16
> > +; CHECK-NEXT: movk  x16, #48879
> > +; CHECK-NEXT: blr x16
> > +  %call = inttoptr i64 281474417671919 to i8*
> > +  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64
> %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32
> undef, i64 10)
> > +  ret i64 %result
> > +}
> > +
> > +; Test patchpoints reusing the same TargetConstant.
> > +; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs
> + 4)
> > +; There is no way to verify this, since it depends on memory allocation.
> > +; But I think it's useful to include as a working example.
> > +define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64*
> %tmp33, i64 %tmp79) {
> > +entry:
> > +  %tmp80 = add i64 %tmp79, -16
> > +  %tmp81 = inttoptr i64 %tmp80 to i64*
> > +  %tmp82 = load i64* %tmp81, align 8
> > +  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14,
> i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
> > +  tail call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64
> %arg, i64 %tmp10, i64 %tmp82)
> > +  %tmp83 = load i64* %tmp33, align 8
> > +  %tmp84 = add i64 %tmp83, -24
> > +  %tmp85 = inttoptr i64 %tmp84 to i64*
> > +  %tmp86 = load i64* %tmp85, align 8
> > +  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17,
> i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
> > +  tail call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64
> %arg, i64 %tmp10, i64 %tmp86)
> > +  ret i64 10
> > +}
> > +
> > +; Test small patchpoints that don't emit calls.
> > +define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64
> %p4) {
> > +entry:
> > +; CHECK-LABEL: small_patchpoint_codegen:
> > +; CHECK:      Ltmp
> > +; CHECK:      nop
> > +; CHECK-NEXT: nop
> > +; CHECK-NEXT: nop
> > +; CHECK-NEXT: nop
> > +; CHECK-NEXT: nop
> > +; CHECK-NEXT: ldp
> > +; CHECK-NEXT: ret
> > +  %result = tail call i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1,
> i64 %p2)
> > +  ret void
> > +}
> > +
> > +; Test that scratch registers are spilled around patchpoints
> > +; CHECK: InlineAsm End
> > +; CHECK-NEXT: mov x{{[0-9]+}}, x16
> > +; CHECK-NEXT: mov x{{[0-9]+}}, x17
> > +; CHECK-NEXT: Ltmp
> > +; CHECK-NEXT: nop
> > +define void @clobberScratch(i32* %p) {
> > +  %v = load i32* %p
> > +  tail call void asm sideeffect "nop",
> "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"()
> nounwind
> > +  tail call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p,
> i32 %v)
> > +  store i32 %v, i32* %p
> > +  ret void
> > +}
> > +
> > +declare void @llvm.experimental.stackmap(i64, i32, ...)
> > +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
> > +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/platform-reg.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/platform-reg.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/platform-reg.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/platform-reg.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,26 @@
> > +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
> --check-prefix=CHECK-DARWIN
> > +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
> > +
> > +; x18 is reserved as a platform register on Darwin but not on other
> > +; systems. Create loads of register pressure and make sure this is
> respected.
> > +
> > +; Also, fp must always refer to a valid frame record, even if it's not
> the one
> > +; of the current function, so it shouldn't be used either.
> > +
> > + at var = global [30 x i64] zeroinitializer
> > +
> > +define void @keep_live() {
> > +  %val = load volatile [30 x i64]* @var
> > +  store volatile [30 x i64] %val, [30 x i64]* @var
> > +
> > +; CHECK: ldr x18
> > +; CHECK: str x18
> > +
> > +; CHECK-DARWIN-NOT: ldr fp
> > +; CHECK-DARWIN-NOT: ldr x18
> > +; CHECK-DARWIN: Spill
> > +; CHECK-DARWIN-NOT: ldr fp
> > +; CHECK-DARWIN-NOT: ldr x18
> > +; CHECK-DARWIN: ret
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/popcnt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/popcnt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/popcnt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/popcnt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,43 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
> > +  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
> > +  ret i32 %cnt
> > +; CHECK: fmov  s0, w0
> > +; CHECK: cnt.8b        v0, v0
> > +; CHECK: uaddlv.8b     h0, v0
> > +; CHECK: fmov w0, s0
> > +; CHECK: ret
> > +}
> > +
> > +define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
> > +  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
> > +  ret i64 %cnt
> > +; CHECK: fmov  d0, x0
> > +; CHECK: cnt.8b        v0, v0
> > +; CHECK: uaddlv.8b     h0, v0
> > +; CHECK: fmov  w0, s0
> > +; CHECK: ret
> > +}
> > +
> > +; Do not use AdvSIMD when -mno-implicit-float is specified.
> > +; rdar://9473858
> > +
> > +define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
> > +  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
> > +  ret i32 %cnt
> > +; CHECK-LABEL: cnt32:
> > +; CHECK-NOT 16b
> > +; CHECK: ret
> > +}
> > +
> > +define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
> > +  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
> > +  ret i64 %cnt
> > +; CHECK-LABEL: cnt64:
> > +; CHECK-NOT 16b
> > +; CHECK: ret
> > +}
> > +
> > +declare i32 @llvm.ctpop.i32(i32) nounwind readnone
> > +declare i64 @llvm.ctpop.i64(i64) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/prefetch.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/prefetch.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/prefetch.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/prefetch.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,88 @@
> > +; RUN: llc %s -march arm64 -o - | FileCheck %s
> > +
> > + at a = common global i32* null, align 8
> > +
> > +define void @test(i32 %i, i32 %j) nounwind ssp {
> > +entry:
> > +  ; CHECK: @test
> > +  %j.addr = alloca i32, align 4
> > +  store i32 %j, i32* %j.addr, align 4, !tbaa !0
> > +  %tmp = bitcast i32* %j.addr to i8*
> > +  ; CHECK: prfum pldl1strm
> > +  call void @llvm.prefetch(i8* %tmp, i32 0, i32 0, i32 1)
> > +  ; CHECK: prfum pldl3keep
> > +  call void @llvm.prefetch(i8* %tmp, i32 0, i32 1, i32 1)
> > +  ; CHECK: prfum pldl2keep
> > +  call void @llvm.prefetch(i8* %tmp, i32 0, i32 2, i32 1)
> > +  ; CHECK: prfum pldl1keep
> > +  call void @llvm.prefetch(i8* %tmp, i32 0, i32 3, i32 1)
> > +
> > +  ; CHECK: prfum pstl1strm
> > +  call void @llvm.prefetch(i8* %tmp, i32 1, i32 0, i32 1)
> > +  ; CHECK: prfum pstl3keep
> > +  call void @llvm.prefetch(i8* %tmp, i32 1, i32 1, i32 1)
> > +  ; CHECK: prfum pstl2keep
> > +  call void @llvm.prefetch(i8* %tmp, i32 1, i32 2, i32 1)
> > +  ; CHECK: prfum pstl1keep
> > +  call void @llvm.prefetch(i8* %tmp, i32 1, i32 3, i32 1)
> > +
> > +  %tmp1 = load i32* %j.addr, align 4, !tbaa !0
> > +  %add = add nsw i32 %tmp1, %i
> > +  %idxprom = sext i32 %add to i64
> > +  %tmp2 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx = getelementptr inbounds i32* %tmp2, i64 %idxprom
> > +  %tmp3 = bitcast i32* %arrayidx to i8*
> > +
> > +  ; CHECK: prfm pldl1strm
> > +  call void @llvm.prefetch(i8* %tmp3, i32 0, i32 0, i32 1)
> > +  %tmp4 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx3 = getelementptr inbounds i32* %tmp4, i64 %idxprom
> > +  %tmp5 = bitcast i32* %arrayidx3 to i8*
> > +
> > +  ; CHECK: prfm pldl3keep
> > +  call void @llvm.prefetch(i8* %tmp5, i32 0, i32 1, i32 1)
> > +  %tmp6 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx6 = getelementptr inbounds i32* %tmp6, i64 %idxprom
> > +  %tmp7 = bitcast i32* %arrayidx6 to i8*
> > +
> > +  ; CHECK: prfm pldl2keep
> > +  call void @llvm.prefetch(i8* %tmp7, i32 0, i32 2, i32 1)
> > +  %tmp8 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx9 = getelementptr inbounds i32* %tmp8, i64 %idxprom
> > +  %tmp9 = bitcast i32* %arrayidx9 to i8*
> > +
> > +  ; CHECK: prfm pldl1keep
> > +  call void @llvm.prefetch(i8* %tmp9, i32 0, i32 3, i32 1)
> > +  %tmp10 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx12 = getelementptr inbounds i32* %tmp10, i64 %idxprom
> > +  %tmp11 = bitcast i32* %arrayidx12 to i8*
> > +
> > +  ; CHECK: prfm pstl1strm
> > +  call void @llvm.prefetch(i8* %tmp11, i32 1, i32 0, i32 1)
> > +  %tmp12 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx15 = getelementptr inbounds i32* %tmp12, i64 %idxprom
> > +  %tmp13 = bitcast i32* %arrayidx15 to i8*
> > +
> > +  ; CHECK: prfm pstl3keep
> > +  call void @llvm.prefetch(i8* %tmp13, i32 1, i32 1, i32 1)
> > +  %tmp14 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx18 = getelementptr inbounds i32* %tmp14, i64 %idxprom
> > +  %tmp15 = bitcast i32* %arrayidx18 to i8*
> > +
> > +  ; CHECK: prfm pstl2keep
> > +  call void @llvm.prefetch(i8* %tmp15, i32 1, i32 2, i32 1)
> > +  %tmp16 = load i32** @a, align 8, !tbaa !3
> > +  %arrayidx21 = getelementptr inbounds i32* %tmp16, i64 %idxprom
> > +  %tmp17 = bitcast i32* %arrayidx21 to i8*
> > +
> > +  ; CHECK: prfm pstl1keep
> > +  call void @llvm.prefetch(i8* %tmp17, i32 1, i32 3, i32 1)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind
> > +
> > +!0 = metadata !{metadata !"int", metadata !1}
> > +!1 = metadata !{metadata !"omnipotent char", metadata !2}
> > +!2 = metadata !{metadata !"Simple C/C++ TBAA"}
> > +!3 = metadata !{metadata !"any pointer", metadata !1}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/promote-const.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/promote-const.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/promote-const.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/promote-const.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,255 @@
> > +; Disable machine cse to stress the different path of the algorithm.
> > +; Otherwise, we always fall in the simple case, i.e., only one
> definition.
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse
> -arm64-stress-promote-const | FileCheck -check-prefix=PROMOTED %s
> > +; The REGULAR run just checks that the inputs passed to promote const
> expose
> > +; the appropriate patterns.
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse
> -arm64-promote-const=false | FileCheck -check-prefix=REGULAR %s
> > +
> > +%struct.uint8x16x4_t = type { [4 x <16 x i8>] }
> > +
> > +; Constant is a structure
> > +define %struct.uint8x16x4_t @test1() {
> > +; PROMOTED-LABEL: test1:
> > +; Promote constant has created a big constant for the whole structure
> > +; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], __PromotedConst at PAGE
> > +; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]],
> __PromotedConst at PAGEOFF
> > +; Destination registers are defined by the ABI
> > +; PROMOTED-NEXT: ldp q0, q1, {{\[}}[[BASEADDR]]]
> > +; PROMOTED-NEXT: ldp q2, q3, {{\[}}[[BASEADDR]], #32]
> > +; PROMOTED-NEXT: ret
> > +
> > +; REGULAR-LABEL: test1:
> > +; Regular access is quite bad, it performs 4 loads, one for each chunk
> of
> > +; the structure
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
> > +; Destination registers are defined by the ABI
> > +; REGULAR: ldr q0, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
> > +; REGULAR: ldr q1, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
> > +; REGULAR: adrp [[PAGEADDR2:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
> > +; REGULAR: ldr q2, {{\[}}[[PAGEADDR2]], [[CSTLABEL2]]@PAGEOFF]
> > +; REGULAR: adrp [[PAGEADDR3:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
> > +; REGULAR: ldr q3, {{\[}}[[PAGEADDR3]], [[CSTLABEL3]]@PAGEOFF]
> > +; REGULAR-NEXT: ret
> > +entry:
> > +  ret %struct.uint8x16x4_t { [4 x <16 x i8>] [<16 x i8> <i8 -40, i8
> -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8
> -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>, <16 x i8> <i8 32, i8 124, i8
> 121, i8 120, i8 8, i8 117, i8 -56, i8 113, i8 -76, i8 110, i8 -53, i8 107,
> i8 7, i8 105, i8 103, i8 102>, <16 x i8> <i8 -24, i8 99, i8 -121, i8 97, i8
> 66, i8 95, i8 24, i8 93, i8 6, i8 91, i8 12, i8 89, i8 39, i8 87, i8 86, i8
> 85>, <16 x i8> <i8 -104, i8 83, i8 -20, i8 81, i8 81, i8 80, i8 -59, i8 78,
> i8 73, i8 77, i8 -37, i8 75, i8 122, i8 74, i8 37, i8 73>] }
> > +}
> > +
> > +; Two different uses of the same constant in the same basic block
> > +define <16 x i8> @test2(<16 x i8> %arg) {
> > +entry:
> > +; PROMOTED-LABEL: test2:
> > +; In stress mode, constant vector are promoted
> > +; PROMOTED: adrp [[PAGEADDR:x[0-9]+]],
> [[CSTV1:__PromotedConst[0-9]+]]@PAGE
> > +; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
> > +; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
> > +; Destination register is defined by ABI
> > +; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
> > +; PROMOTED-NEXT: mla.16b v0, v0, v[[REGNUM]]
> > +; PROMOTED-NEXT: ret
> > +
> > +; REGULAR-LABEL: test2:
> > +; Regular access is strickly the same as promoted access.
> > +; The difference is that the address (and thus the space in memory) is
> not
> > +; shared between constants
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
> > +; REGULAR: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL]]@PAGEOFF]
> > +; Destination register is defined by ABI
> > +; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
> > +; REGULAR-NEXT: mla.16b v0, v0, v[[REGNUM]]
> > +; REGULAR-NEXT: ret
> > +  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  %mul.i = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  %add.i9 = add <16 x i8> %add.i, %mul.i
> > +  ret <16 x i8> %add.i9
> > +}
> > +
> > +; Two different uses of the sane constant in two different basic blocks,
> > +; one dominates the other
> > +define <16 x i8> @test3(<16 x i8> %arg, i32 %path) {
> > +; PROMOTED-LABEL: test3:
> > +; In stress mode, constant vector are promoted
> > +; Since, the constant is the same as the previous function,
> > +; the same address must be used
> > +; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
> > +; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
> > +; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
> > +; Destination register is defined by ABI
> > +; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
> > +; PROMOTED-NEXT: cbnz w0, [[LABEL:LBB.*]]
> > +; Next BB
> > +; PROMOTED: adrp [[PAGEADDR:x[0-9]+]],
> [[CSTV2:__PromotedConst[0-9]+]]@PAGE
> > +; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV2]]@PAGEOFF
> > +; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[BASEADDR]]]
> > +; Next BB
> > +; PROMOTED-NEXT: [[LABEL]]:
> > +; PROMOTED-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
> > +; PROMOTED-NEXT: add.16b v0, v0, [[DESTV]]
> > +; PROMOTED-NEXT: ret
> > +
> > +; REGULAR-LABEL: test3:
> > +; Regular mode does not elimitate common sub expression by its own.
> > +; In other words, the same loads appears several times.
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1:lCP.*]]@PAGE
> > +; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL1]]@PAGEOFF]
> > +; Destination register is defined by ABI
> > +; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
> > +; REGULAR-NEXT: cbz w0, [[LABELelse:LBB.*]]
> > +; Next BB
> > +; Redundant load
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1]]@PAGE
> > +; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL1]]@PAGEOFF]
> > +; REGULAR-NEXT: b [[LABELend:LBB.*]]
> > +; Next BB
> > +; REGULAR-NEXT: [[LABELelse]]
> > +; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
> > +; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL2]]@PAGEOFF]
> > +; Next BB
> > +; REGULAR-NEXT: [[LABELend]]:
> > +; REGULAR-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
> > +; REGULAR-NEXT: add.16b v0, v0, [[DESTV]]
> > +; REGULAR-NEXT: ret
> > +entry:
> > +  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  %tobool = icmp eq i32 %path, 0
> > +  br i1 %tobool, label %if.else, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %mul.i13 = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  br label %if.end
> > +
> > +if.else:                                          ; preds = %entry
> > +  %mul.i = mul <16 x i8> %add.i, <i8 -24, i8 99, i8 -121, i8 97, i8 66,
> i8 95, i8 24, i8 93, i8 6, i8 91, i8 12, i8 89, i8 39, i8 87, i8 86, i8 85>
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %if.else,
> %if.then
> > +  %ret2.0 = phi <16 x i8> [ %mul.i13, %if.then ], [ %mul.i, %if.else ]
> > +  %add.i12 = add <16 x i8> %add.i, %ret2.0
> > +  ret <16 x i8> %add.i12
> > +}
> > +
> > +; Two different uses of the sane constant in two different basic blocks,
> > +; none dominates the other
> > +define <16 x i8> @test4(<16 x i8> %arg, i32 %path) {
> > +; PROMOTED-LABEL: test4:
> > +; In stress mode, constant vector are promoted
> > +; Since, the constant is the same as the previous function,
> > +; the same address must be used
> > +; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
> > +; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
> > +; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
> > +; Destination register is defined by ABI
> > +; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
> > +; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
> > +; Next BB
> > +; PROMOTED: mul.16b v0, v0, v[[REGNUM]]
> > +; Next BB
> > +; PROMOTED-NEXT: [[LABEL]]:
> > +; PROMOTED-NEXT: ret
> > +
> > +
> > +; REGULAR-LABEL: test4:
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
> > +; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL3]]@PAGEOFF]
> > +; Destination register is defined by ABI
> > +; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
> > +; REGULAR-NEXT: cbz w0, [[LABEL:LBB.*]]
> > +; Next BB
> > +; Redundant expression
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3]]@PAGE
> > +; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL3]]@PAGEOFF]
> > +; Destination register is defined by ABI
> > +; REGULAR-NEXT: mul.16b v0, v0, v[[REGNUM]]
> > +; Next BB
> > +; REGULAR-NEXT: [[LABEL]]:
> > +; REGULAR-NEXT: ret
> > +entry:
> > +  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  %tobool = icmp eq i32 %path, 0
> > +  br i1 %tobool, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %mul.i = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %entry,
> %if.then
> > +  %ret.0 = phi <16 x i8> [ %mul.i, %if.then ], [ %add.i, %entry ]
> > +  ret <16 x i8> %ret.0
> > +}
> > +
> > +; Two different uses of the sane constant in two different basic blocks,
> > +; one is in a phi.
> > +define <16 x i8> @test5(<16 x i8> %arg, i32 %path) {
> > +; PROMOTED-LABEL: test5:
> > +; In stress mode, constant vector are promoted
> > +; Since, the constant is the same as the previous function,
> > +; the same address must be used
> > +; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
> > +; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
> > +; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
> > +; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
> > +; Next BB
> > +; PROMOTED: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
> > +; PROMOTED-NEXT: mul.16b v[[REGNUM]], [[DESTV]], v[[REGNUM]]
> > +; Next BB
> > +; PROMOTED-NEXT: [[LABEL]]:
> > +; PROMOTED-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[REGNUM]], v[[REGNUM]]
> > +; PROMOTED-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
> > +; PROMOTED-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
> > +; PROMOTED-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
> > +; PROMOTED-NEXT: ret
> > +
> > +; REGULAR-LABEL: test5:
> > +; REGULAR: cbz w0, [[LABELelse:LBB.*]]
> > +; Next BB
> > +; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
> > +; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL]]@PAGEOFF]
> > +; REGULAR-NEXT: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
> > +; REGULAR-NEXT: mul.16b v[[DESTREGNUM:[0-9]+]], [[DESTV]], v[[REGNUM]]
> > +; REGULAR-NEXT: b [[LABELend:LBB.*]]
> > +; Next BB
> > +; REGULAR-NEXT: [[LABELelse]]
> > +; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
> > +; REGULAR-NEXT: ldr q[[DESTREGNUM]], {{\[}}[[PAGEADDR]],
> [[CSTLABEL]]@PAGEOFF]
> > +; Next BB
> > +; REGULAR-NEXT: [[LABELend]]:
> > +; REGULAR-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[DESTREGNUM]],
> v[[DESTREGNUM]]
> > +; REGULAR-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
> > +; REGULAR-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
> > +; REGULAR-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
> > +; REGULAR-NEXT: ret
> > +entry:
> > +  %tobool = icmp eq i32 %path, 0
> > +  br i1 %tobool, label %if.end, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  %mul.i26 = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8
> -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8
> -124, i8 0, i8 -128>
> > +  br label %if.end
> > +
> > +if.end:                                           ; preds = %entry,
> %if.then
> > +  %ret.0 = phi <16 x i8> [ %mul.i26, %if.then ], [ <i8 -40, i8 -93, i8
> -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8
> -120, i8 34, i8 -124, i8 0, i8 -128>, %entry ]
> > +  %mul.i25 = mul <16 x i8> %ret.0, %ret.0
> > +  %mul.i24 = mul <16 x i8> %mul.i25, %mul.i25
> > +  %mul.i23 = mul <16 x i8> %mul.i24, %mul.i24
> > +  %mul.i = mul <16 x i8> %mul.i23, %mul.i23
> > +  ret <16 x i8> %mul.i
> > +}
> > +
> > +define void @accessBig(i64* %storage) {
> > +; PROMOTED-LABEL: accessBig:
> > +; PROMOTED: adrp
> > +; PROMOTED: ret
> > +  %addr = bitcast i64* %storage to <1 x i80>*
> > +  store <1 x i80> <i80 483673642326615442599424>, <1 x i80>* %addr
> > +  ret void
> > +}
> > +
> > +define void @asmStatement() {
> > +; PROMOTED-LABEL: asmStatement:
> > +; PROMOTED-NOT: adrp
> > +; PROMOTED: ret
> > +  call void asm sideeffect "bfxil w0, w0, $0, $1", "i,i"(i32 28, i32 4)
> > +  ret void
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/redzone.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/redzone.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/redzone.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/redzone.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,18 @@
> > +; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s
> > +
> > +define i32 @foo(i32 %a, i32 %b) nounwind ssp {
> > +; CHECK-LABEL: foo:
> > +; CHECK-NOT: sub sp, sp
> > +; CHECK: ret
> > +  %a.addr = alloca i32, align 4
> > +  %b.addr = alloca i32, align 4
> > +  %x = alloca i32, align 4
> > +  store i32 %a, i32* %a.addr, align 4
> > +  store i32 %b, i32* %b.addr, align 4
> > +  %tmp = load i32* %a.addr, align 4
> > +  %tmp1 = load i32* %b.addr, align 4
> > +  %add = add nsw i32 %tmp, %tmp1
> > +  store i32 %add, i32* %x, align 4
> > +  %tmp2 = load i32* %x, align 4
> > +  ret i32 %tmp2
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll Sat Mar
> 29 05:18:08 2014
> > @@ -0,0 +1,12 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
> > +
> > +define i8 @t1(i16* %a, i64 %b) {
> > +; CHECK: t1
> > +; CHECK: lsl [[REG:x[0-9]+]], x1, #1
> > +; CHECK: ldrb w0, [x0, [[REG]]]
> > +; CHECK: ret
> > +  %tmp1 = getelementptr inbounds i16* %a, i64 %b
> > +  %tmp2 = load i16* %tmp1
> > +  %tmp3 = trunc i16 %tmp2 to i8
> > +  ret i8 %tmp3
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/register-pairing.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/register-pairing.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/register-pairing.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/register-pairing.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,53 @@
> > +; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
> > +;
> > +; rdar://14075006
> > +
> > +define void @odd() nounwind {
> > +; CHECK-LABEL: odd:
> > +; CHECK: stp d15, d14, [sp, #-144]!
> > +; CHECK: stp d13, d12, [sp, #16]
> > +; CHECK: stp d11, d10, [sp, #32]
> > +; CHECK: stp d9, d8, [sp, #48]
> > +; CHECK: stp x28, x27, [sp, #64]
> > +; CHECK: stp x26, x25, [sp, #80]
> > +; CHECK: stp x24, x23, [sp, #96]
> > +; CHECK: stp x22, x21, [sp, #112]
> > +; CHECK: stp x20, x19, [sp, #128]
> > +; CHECK: movz x0, #42
> > +; CHECK: ldp x20, x19, [sp, #128]
> > +; CHECK: ldp x22, x21, [sp, #112]
> > +; CHECK: ldp x24, x23, [sp, #96]
> > +; CHECK: ldp x26, x25, [sp, #80]
> > +; CHECK: ldp x28, x27, [sp, #64]
> > +; CHECK: ldp d9, d8, [sp, #48]
> > +; CHECK: ldp d11, d10, [sp, #32]
> > +; CHECK: ldp d13, d12, [sp, #16]
> > +; CHECK: ldp d15, d14, [sp], #144
> > +  call void asm sideeffect "mov x0, #42",
> "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"()
> nounwind
> > +  ret void
> > +}
> > +
> > +define void @even() nounwind {
> > +; CHECK-LABEL: even:
> > +; CHECK: stp d15, d14, [sp, #-144]!
> > +; CHECK: stp d13, d12, [sp, #16]
> > +; CHECK: stp d11, d10, [sp, #32]
> > +; CHECK: stp d9, d8, [sp, #48]
> > +; CHECK: stp x28, x27, [sp, #64]
> > +; CHECK: stp x26, x25, [sp, #80]
> > +; CHECK: stp x24, x23, [sp, #96]
> > +; CHECK: stp x22, x21, [sp, #112]
> > +; CHECK: stp x20, x19, [sp, #128]
> > +; CHECK: movz x0, #42
> > +; CHECK: ldp x20, x19, [sp, #128]
> > +; CHECK: ldp x22, x21, [sp, #112]
> > +; CHECK: ldp x24, x23, [sp, #96]
> > +; CHECK: ldp x26, x25, [sp, #80]
> > +; CHECK: ldp x28, x27, [sp, #64]
> > +; CHECK: ldp d9, d8, [sp, #48]
> > +; CHECK: ldp d11, d10, [sp, #32]
> > +; CHECK: ldp d13, d12, [sp, #16]
> > +; CHECK: ldp d15, d14, [sp], #144
> > +  call void asm sideeffect "mov x0, #42",
> "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"()
> nounwind
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/regress-f128csel-flags.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/regress-f128csel-flags.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/regress-f128csel-flags.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/regress-f128csel-flags.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,27 @@
> > +; RUN: llc -march=arm64 -verify-machineinstrs < %s | FileCheck %s
> > +
> > +; We used to not mark NZCV as being used in the continuation basic-block
> > +; when lowering a 128-bit "select" to branches. This meant a subsequent
> use
> > +; of the same flags gave an internal fault here.
> > +
> > +declare void @foo(fp128)
> > +
> > +define double @test_f128csel_flags(i32 %lhs, fp128 %a, fp128 %b, double
> %l, double %r) nounwind {
> > +; CHECK: test_f128csel_flags
> > +
> > +    %tst = icmp ne i32 %lhs, 42
> > +    %val = select i1 %tst, fp128 %a, fp128 %b
> > +; CHECK: cmp w0, #42
> > +; CHECK: b.eq {{.?LBB0}}
> > +
> > +    call void @foo(fp128 %val)
> > +    %retval = select i1 %tst, double %l, double %r
> > +
> > +    ; It's also reasonably important that the actual fcsel comes before
> the
> > +    ; function call since bl may corrupt NZCV. We were doing the right
> thing anyway,
> > +    ; but just as well test it while we're here.
> > +; CHECK: fcsel {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, ne
> > +; CHECK: bl {{_?foo}}
> > +
> > +    ret double %retval
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/return-vector.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/return-vector.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/return-vector.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/return-vector.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +; 2x64 vector should be returned in Q0.
> > +
> > +define <2 x double> @test(<2 x double>* %p) nounwind {
> > +; CHECK: test
> > +; CHECK: ldr q0, [x0]
> > +; CHECK: ret
> > +  %tmp1 = load <2 x double>* %p, align 16
> > +  ret <2 x double> %tmp1
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/returnaddr.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/returnaddr.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/returnaddr.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/returnaddr.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,26 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define i8* @rt0(i32 %x) nounwind readnone {
> > +entry:
> > +; CHECK-LABEL: rt0:
> > +; CHECK: mov x0, lr
> > +; CHECK: ret
> > +  %0 = tail call i8* @llvm.returnaddress(i32 0)
> > +  ret i8* %0
> > +}
> > +
> > +define i8* @rt2() nounwind readnone {
> > +entry:
> > +; CHECK-LABEL: rt2:
> > +; CHECK: stp fp, lr, [sp, #-16]!
> > +; CHECK: mov fp, sp
> > +; CHECK: ldr x[[REG:[0-9]+]], [fp]
> > +; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]]]
> > +; CHECK: ldr x0, [x[[REG2]], #8]
> > +; CHECK: ldp fp, lr, [sp], #16
> > +; CHECK: ret
> > +  %0 = tail call i8* @llvm.returnaddress(i32 2)
> > +  ret i8* %0
> > +}
> > +
> > +declare i8* @llvm.returnaddress(i32) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/rev.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/rev.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/rev.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/rev.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,221 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define i32 @test_rev_w(i32 %a) nounwind {
> > +entry:
> > +; CHECK-LABEL: test_rev_w:
> > +; CHECK: rev w0, w0
> > +  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
> > +  ret i32 %0
> > +}
> > +
> > +define i64 @test_rev_x(i64 %a) nounwind {
> > +entry:
> > +; CHECK-LABEL: test_rev_x:
> > +; CHECK: rev x0, x0
> > +  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
> > +  ret i64 %0
> > +}
> > +
> > +declare i32 @llvm.bswap.i32(i32) nounwind readnone
> > +declare i64 @llvm.bswap.i64(i64) nounwind readnone
> > +
> > +define i32 @test_rev16_w(i32 %X) nounwind {
> > +entry:
> > +; CHECK-LABEL: test_rev16_w:
> > +; CHECK: rev16 w0, w0
> > +  %tmp1 = lshr i32 %X, 8
> > +  %X15 = bitcast i32 %X to i32
> > +  %tmp4 = shl i32 %X15, 8
> > +  %tmp2 = and i32 %tmp1, 16711680
> > +  %tmp5 = and i32 %tmp4, -16777216
> > +  %tmp9 = and i32 %tmp1, 255
> > +  %tmp13 = and i32 %tmp4, 65280
> > +  %tmp6 = or i32 %tmp5, %tmp2
> > +  %tmp10 = or i32 %tmp6, %tmp13
> > +  %tmp14 = or i32 %tmp10, %tmp9
> > +  ret i32 %tmp14
> > +}
> > +
> > +define i64 @test_rev16_x(i64 %a) nounwind {
> > +entry:
> > +; CHECK-LABEL: test_rev16_x:
> > +; CHECK: rev16 x0, x0
> > +  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
> > +  %1 = lshr i64 %0, 16
> > +  %2 = shl i64 %0, 48
> > +  %3 = or i64 %1, %2
> > +  ret i64 %3
> > +}
> > +
> > +define i64 @test_rev32_x(i64 %a) nounwind {
> > +entry:
> > +; CHECK-LABEL: test_rev32_x:
> > +; CHECK: rev32 x0, x0
> > +  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
> > +  %1 = lshr i64 %0, 32
> > +  %2 = shl i64 %0, 32
> > +  %3 = or i64 %1, %2
> > +  ret i64 %3
> > +}
> > +
> > +define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64D8:
> > +;CHECK: rev64.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
> > +       ret <8 x i8> %tmp2
> > +}
> > +
> > +define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64D16:
> > +;CHECK: rev64.4h
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x
> i32> <i32 3, i32 2, i32 1, i32 0>
> > +       ret <4 x i16> %tmp2
> > +}
> > +
> > +define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64D32:
> > +;CHECK: rev64.2s
> > +       %tmp1 = load <2 x i32>* %A
> > +       %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x
> i32> <i32 1, i32 0>
> > +       ret <2 x i32> %tmp2
> > +}
> > +
> > +define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64Df:
> > +;CHECK: rev64.2s
> > +       %tmp1 = load <2 x float>* %A
> > +       %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x
> i32> <i32 1, i32 0>
> > +       ret <2 x float> %tmp2
> > +}
> > +
> > +define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64Q8:
> > +;CHECK: rev64.16b
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x
> i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32
> 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
> > +       ret <16 x i8> %tmp2
> > +}
> > +
> > +define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64Q16:
> > +;CHECK: rev64.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x
> i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
> > +       ret <8 x i16> %tmp2
> > +}
> > +
> > +define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64Q32:
> > +;CHECK: rev64.4s
> > +       %tmp1 = load <4 x i32>* %A
> > +       %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x
> i32> <i32 1, i32 0, i32 3, i32 2>
> > +       ret <4 x i32> %tmp2
> > +}
> > +
> > +define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64Qf:
> > +;CHECK: rev64.4s
> > +       %tmp1 = load <4 x float>* %A
> > +       %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x
> i32> <i32 1, i32 0, i32 3, i32 2>
> > +       ret <4 x float> %tmp2
> > +}
> > +
> > +define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev32D8:
> > +;CHECK: rev32.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
> > +       ret <8 x i8> %tmp2
> > +}
> > +
> > +define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev32D16:
> > +;CHECK: rev32.4h
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x
> i32> <i32 1, i32 0, i32 3, i32 2>
> > +       ret <4 x i16> %tmp2
> > +}
> > +
> > +define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev32Q8:
> > +;CHECK: rev32.16b
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x
> i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32
> 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
> > +       ret <16 x i8> %tmp2
> > +}
> > +
> > +define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev32Q16:
> > +;CHECK: rev32.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x
> i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
> > +       ret <8 x i16> %tmp2
> > +}
> > +
> > +define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev16D8:
> > +;CHECK: rev16.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
> > +       ret <8 x i8> %tmp2
> > +}
> > +
> > +define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev16Q8:
> > +;CHECK: rev16.16b
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x
> i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8,
> i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
> > +       ret <16 x i8> %tmp2
> > +}
> > +
> > +; Undef shuffle indices should not prevent matching to VREV:
> > +
> > +define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev64D8_undef:
> > +;CHECK: rev64.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
> > +       ret <8 x i8> %tmp2
> > +}
> > +
> > +define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: test_vrev32Q16_undef:
> > +;CHECK: rev32.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x
> i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
> > +       ret <8 x i16> %tmp2
> > +}
> > +
> > +; vrev <4 x i16> should use REV32 and not REV64
> > +define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>*
> nocapture %dst) nounwind ssp {
> > +; CHECK-LABEL: test_vrev64:
> > +; CHECK: ldr [[DEST:q[0-9]+]],
> > +; CHECK: st1.h
> > +; CHECK: st1.h
> > +entry:
> > +  %0 = bitcast <4 x i16>* %source to <8 x i16>*
> > +  %tmp2 = load <8 x i16>* %0, align 4
> > +  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
> > +  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
> > +  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
> > +  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
> > +  store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
> > +  ret void
> > +}
> > +
> > +; Test vrev of float4
> > +define void @float_vrev64(float* nocapture %source, <4 x float>*
> nocapture %dest) nounwind noinline ssp {
> > +; CHECK: float_vrev64
> > +; CHECK: ldr [[DEST:q[0-9]+]],
> > +; CHECK: rev64.4s
> > +entry:
> > +  %0 = bitcast float* %source to <4 x float>*
> > +  %tmp2 = load <4 x float>* %0, align 4
> > +  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef,
> float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32
> 0, i32 0>
> > +  %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
> > +  store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
> > +  ret void
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/rounding.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/rounding.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/rounding.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/rounding.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,208 @@
> > +; RUN: llc -O3 < %s | FileCheck %s
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
> > +target triple = "arm64-apple-ios6.0.0"
> > +
> > +; CHECK: test1
> > +; CHECK: frintx
> > +; CHECK: frintm
> > +define float @test1(float %a) #0 {
> > +entry:
> > +  %call = tail call float @floorf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +declare float @floorf(float) nounwind readnone
> > +
> > +; CHECK: test2
> > +; CHECK: frintx
> > +; CHECK: frintm
> > +define double @test2(double %a) #0 {
> > +entry:
> > +  %call = tail call double @floor(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +declare double @floor(double) nounwind readnone
> > +
> > +; CHECK: test3
> > +; CHECK: frinti
> > +define float @test3(float %a) #0 {
> > +entry:
> > +  %call = tail call float @nearbyintf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +declare float @nearbyintf(float) nounwind readnone
> > +
> > +; CHECK: test4
> > +; CHECK: frinti
> > +define double @test4(double %a) #0 {
> > +entry:
> > +  %call = tail call double @nearbyint(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +declare double @nearbyint(double) nounwind readnone
> > +
> > +; CHECK: test5
> > +; CHECK: frintx
> > +; CHECK: frintp
> > +define float @test5(float %a) #0 {
> > +entry:
> > +  %call = tail call float @ceilf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +declare float @ceilf(float) nounwind readnone
> > +
> > +; CHECK: test6
> > +; CHECK: frintx
> > +; CHECK: frintp
> > +define double @test6(double %a) #0 {
> > +entry:
> > +  %call = tail call double @ceil(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +declare double @ceil(double) nounwind readnone
> > +
> > +; CHECK: test7
> > +; CHECK: frintx
> > +define float @test7(float %a) #0 {
> > +entry:
> > +  %call = tail call float @rintf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +declare float @rintf(float) nounwind readnone
> > +
> > +; CHECK: test8
> > +; CHECK: frintx
> > +define double @test8(double %a) #0 {
> > +entry:
> > +  %call = tail call double @rint(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +declare double @rint(double) nounwind readnone
> > +
> > +; CHECK: test9
> > +; CHECK: frintx
> > +; CHECK: frintz
> > +define float @test9(float %a) #0 {
> > +entry:
> > +  %call = tail call float @truncf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +declare float @truncf(float) nounwind readnone
> > +
> > +; CHECK: test10
> > +; CHECK: frintx
> > +; CHECK: frintz
> > +define double @test10(double %a) #0 {
> > +entry:
> > +  %call = tail call double @trunc(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +declare double @trunc(double) nounwind readnone
> > +
> > +; CHECK: test11
> > +; CHECK: frintx
> > +; CHECK: frinta
> > +define float @test11(float %a) #0 {
> > +entry:
> > +  %call = tail call float @roundf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +declare float @roundf(float %a) nounwind readnone
> > +
> > +; CHECK: test12
> > +; CHECK: frintx
> > +; CHECK: frinta
> > +define double @test12(double %a) #0 {
> > +entry:
> > +  %call = tail call double @round(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +declare double @round(double %a) nounwind readnone
> > +
> > +; CHECK: test13
> > +; CHECK-NOT: frintx
> > +; CHECK: frintm
> > +define float @test13(float %a) #1 {
> > +entry:
> > +  %call = tail call float @floorf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +; CHECK: test14
> > +; CHECK-NOT: frintx
> > +; CHECK: frintm
> > +define double @test14(double %a) #1 {
> > +entry:
> > +  %call = tail call double @floor(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +; CHECK: test15
> > +; CHECK-NOT: frintx
> > +; CHECK: frintp
> > +define float @test15(float %a) #1 {
> > +entry:
> > +  %call = tail call float @ceilf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +; CHECK: test16
> > +; CHECK-NOT: frintx
> > +; CHECK: frintp
> > +define double @test16(double %a) #1 {
> > +entry:
> > +  %call = tail call double @ceil(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +; CHECK: test17
> > +; CHECK-NOT: frintx
> > +; CHECK: frintz
> > +define float @test17(float %a) #1 {
> > +entry:
> > +  %call = tail call float @truncf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +; CHECK: test18
> > +; CHECK-NOT: frintx
> > +; CHECK: frintz
> > +define double @test18(double %a) #1 {
> > +entry:
> > +  %call = tail call double @trunc(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +; CHECK: test19
> > +; CHECK-NOT: frintx
> > +; CHECK: frinta
> > +define float @test19(float %a) #1 {
> > +entry:
> > +  %call = tail call float @roundf(float %a) nounwind readnone
> > +  ret float %call
> > +}
> > +
> > +; CHECK: test20
> > +; CHECK-NOT: frintx
> > +; CHECK: frinta
> > +define double @test20(double %a) #1 {
> > +entry:
> > +  %call = tail call double @round(double %a) nounwind readnone
> > +  ret double %call
> > +}
> > +
> > +
> > +
> > +attributes #0 = { nounwind }
> > +attributes #1 = { nounwind "unsafe-fp-math"="true" }
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/scaled_iv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/scaled_iv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/scaled_iv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/scaled_iv.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,38 @@
> > +; RUN: opt -S -loop-reduce < %s | FileCheck %s
> > +; Scaling factor in addressing mode are costly.
> > +; Make loop-reduce prefer unscaled accesses.
> > +; <rdar://problem/13806271>
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
> > +target triple = "arm64-apple-ios7.0.0"
> > +
> > +; Function Attrs: nounwind ssp
> > +define void @mulDouble(double* nocapture %a, double* nocapture %b,
> double* nocapture %c) {
> > +; CHECK: @mulDouble
> > +entry:
> > +  br label %for.body
> > +
> > +for.body:                                         ; preds = %for.body,
> %entry
> > +; CHECK: [[IV:%[^ ]+]] = phi i64 [ [[IVNEXT:%[^,]+]], %for.body ], [ 0,
> %entry ]
> > +; Only one induction variable should have been generated.
> > +; CHECK-NOT: phi
> > +  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
> > +  %tmp = add nsw i64 %indvars.iv, -1
> > +  %arrayidx = getelementptr inbounds double* %b, i64 %tmp
> > +  %tmp1 = load double* %arrayidx, align 8
> > +; The induction variable should carry the scaling factor: 1 * 8 = 8.
> > +; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8
> > +  %indvars.iv.next = add i64 %indvars.iv, 1
> > +  %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
> > +  %tmp2 = load double* %arrayidx2, align 8
> > +  %mul = fmul double %tmp1, %tmp2
> > +  %arrayidx4 = getelementptr inbounds double* %a, i64 %indvars.iv
> > +  store double %mul, double* %arrayidx4, align 8
> > +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> > +; Comparison should be 19 * 8 = 152.
> > +; CHECK: icmp eq i32 {{%[^,]+}}, 152
> > +  %exitcond = icmp eq i32 %lftr.wideiv, 20
> > +  br i1 %exitcond, label %for.end, label %for.body
> > +
> > +for.end:                                          ; preds = %for.body
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/scvt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/scvt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/scvt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/scvt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,830 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +; rdar://13082402
> > +
> > +define float @t1(i32* nocapture %src) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: ldr s0, [x0]
> > +; CHECK: scvtf s0, s0
> > +  %tmp1 = load i32* %src, align 4
> > +  %tmp2 = sitofp i32 %tmp1 to float
> > +  ret float %tmp2
> > +}
> > +
> > +define float @t2(i32* nocapture %src) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: ldr s0, [x0]
> > +; CHECK: ucvtf s0, s0
> > +  %tmp1 = load i32* %src, align 4
> > +  %tmp2 = uitofp i32 %tmp1 to float
> > +  ret float %tmp2
> > +}
> > +
> > +define double @t3(i64* nocapture %src) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t3:
> > +; CHECK: ldr d0, [x0]
> > +; CHECK: scvtf d0, d0
> > +  %tmp1 = load i64* %src, align 4
> > +  %tmp2 = sitofp i64 %tmp1 to double
> > +  ret double %tmp2
> > +}
> > +
> > +define double @t4(i64* nocapture %src) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: t4:
> > +; CHECK: ldr d0, [x0]
> > +; CHECK: ucvtf d0, d0
> > +  %tmp1 = load i64* %src, align 4
> > +  %tmp2 = uitofp i64 %tmp1 to double
> > +  ret double %tmp2
> > +}
> > +
> > +; rdar://13136456
> > +define double @t5(i32* nocapture %src) nounwind ssp optsize {
> > +entry:
> > +; CHECK-LABEL: t5:
> > +; CHECK: ldr [[REG:w[0-9]+]], [x0]
> > +; CHECK: scvtf d0, [[REG]]
> > +  %tmp1 = load i32* %src, align 4
> > +  %tmp2 = sitofp i32 %tmp1 to double
> > +  ret double %tmp2
> > +}
> > +
> > +; Check that we load in FP register when we want to convert into
> > +; floating point value.
> > +; This is much faster than loading on GPR and making the conversion
> > +; GPR -> FPR.
> > +; <rdar://problem/14599607>
> > +;
> > +; Check the flollowing patterns for signed/unsigned:
> > +; 1. load with scaled imm to float.
> > +; 2. load with scaled register to float.
> > +; 3. load with scaled imm to double.
> > +; 4. load with scaled register to double.
> > +; 5. load with unscaled imm to float.
> > +; 6. load with unscaled imm to double.
> > +; With loading size: 8, 16, 32, and 64-bits.
> > +
> > +; ********* 1. load with scaled imm to float. *********
> > +define float @fct1(i8* nocapture %sp0) {
> > +; CHECK-LABEL: fct1:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = uitofp i8 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @fct2(i16* nocapture %sp0) {
> > +; CHECK-LABEL: fct2:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = uitofp i16 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @fct3(i32* nocapture %sp0) {
> > +; CHECK-LABEL: fct3:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = uitofp i32 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; i64 -> f32 is not supported on floating point unit.
> > +define float @fct4(i64* nocapture %sp0) {
> > +; CHECK-LABEL: fct4:
> > +; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = uitofp i64 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; ********* 2. load with scaled register to float. *********
> > +define float @fct5(i8* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct5:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = uitofp i8 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @fct6(i16* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct6:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = uitofp i16 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @fct7(i32* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct7:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = uitofp i32 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; i64 -> f32 is not supported on floating point unit.
> > +define float @fct8(i64* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct8:
> > +; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = uitofp i64 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +
> > +; ********* 3. load with scaled imm to double. *********
> > +define double @fct9(i8* nocapture %sp0) {
> > +; CHECK-LABEL: fct9:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = uitofp i8 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct10(i16* nocapture %sp0) {
> > +; CHECK-LABEL: fct10:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = uitofp i16 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct11(i32* nocapture %sp0) {
> > +; CHECK-LABEL: fct11:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = uitofp i32 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct12(i64* nocapture %sp0) {
> > +; CHECK-LABEL: fct12:
> > +; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = uitofp i64 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +; ********* 4. load with scaled register to double. *********
> > +define double @fct13(i8* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct13:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = uitofp i8 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct14(i16* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct14:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = uitofp i16 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct15(i32* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct15:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = uitofp i32 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct16(i64* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct16:
> > +; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = uitofp i64 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +; ********* 5. load with unscaled imm to float. *********
> > +define float @fct17(i8* nocapture %sp0) {
> > +entry:
> > +; CHECK-LABEL: fct17:
> > +; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i8* %sp0 to i64
> > +  %add = add i64 %bitcast, -1
> > +  %addr = inttoptr i64 %add to i8*
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = uitofp i8 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @fct18(i16* nocapture %sp0) {
> > +; CHECK-LABEL: fct18:
> > +; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i16* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i16*
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = uitofp i16 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @fct19(i32* nocapture %sp0) {
> > +; CHECK-LABEL: fct19:
> > +; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i32* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i32*
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = uitofp i32 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; i64 -> f32 is not supported on floating point unit.
> > +define float @fct20(i64* nocapture %sp0) {
> > +; CHECK-LABEL: fct20:
> > +; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i64* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i64*
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = uitofp i64 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +
> > +}
> > +
> > +; ********* 6. load with unscaled imm to double. *********
> > +define double @fct21(i8* nocapture %sp0) {
> > +entry:
> > +; CHECK-LABEL: fct21:
> > +; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i8* %sp0 to i64
> > +  %add = add i64 %bitcast, -1
> > +  %addr = inttoptr i64 %add to i8*
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = uitofp i8 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct22(i16* nocapture %sp0) {
> > +; CHECK-LABEL: fct22:
> > +; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i16* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i16*
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = uitofp i16 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct23(i32* nocapture %sp0) {
> > +; CHECK-LABEL: fct23:
> > +; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i32* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i32*
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = uitofp i32 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @fct24(i64* nocapture %sp0) {
> > +; CHECK-LABEL: fct24:
> > +; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i64* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i64*
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = uitofp i64 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +
> > +}
> > +
> > +; ********* 1s. load with scaled imm to float. *********
> > +define float @sfct1(i8* nocapture %sp0) {
> > +; CHECK-LABEL: sfct1:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
> > +; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
> > +; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = sitofp i8 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @sfct2(i16* nocapture %sp0) {
> > +; CHECK-LABEL: sfct2:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
> > +; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
> > +; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = sitofp i16 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @sfct3(i32* nocapture %sp0) {
> > +; CHECK-LABEL: sfct3:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
> > +; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = sitofp i32 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; i64 -> f32 is not supported on floating point unit.
> > +define float @sfct4(i64* nocapture %sp0) {
> > +; CHECK-LABEL: sfct4:
> > +; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
> > +; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = sitofp i64 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; ********* 2s. load with scaled register to float. *********
> > +define float @sfct5(i8* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct5:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
> > +; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
> > +; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
> > +; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = sitofp i8 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @sfct6(i16* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct6:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
> > +; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
> > +; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = sitofp i16 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @sfct7(i32* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct7:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
> > +; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = sitofp i32 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; i64 -> f32 is not supported on floating point unit.
> > +define float @sfct8(i64* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct8:
> > +; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
> > +; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = sitofp i64 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; ********* 3s. load with scaled imm to double. *********
> > +define double @sfct9(i8* nocapture %sp0) {
> > +; CHECK-LABEL: sfct9:
> > +; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = sitofp i8 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct10(i16* nocapture %sp0) {
> > +; CHECK-LABEL: sfct10:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
> > +; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
> > +; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
> > +; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = sitofp i16 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct11(i32* nocapture %sp0) {
> > +; CHECK-LABEL: sfct11:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
> > +; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
> > +; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = sitofp i32 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct12(i64* nocapture %sp0) {
> > +; CHECK-LABEL: sfct12:
> > +; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
> > +; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = sitofp i64 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +; ********* 4s. load with scaled register to double. *********
> > +define double @sfct13(i8* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct13:
> > +; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
> > +; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = sitofp i8 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct14(i16* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct14:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
> > +; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
> > +; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
> > +; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = sitofp i16 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct15(i32* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct15:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
> > +; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
> > +; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = sitofp i32 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct16(i64* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: sfct16:
> > +; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
> > +; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = sitofp i64 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +; ********* 5s. load with unscaled imm to float. *********
> > +define float @sfct17(i8* nocapture %sp0) {
> > +entry:
> > +; CHECK-LABEL: sfct17:
> > +; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
> > +; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
> > +; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
> > +; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i8* %sp0 to i64
> > +  %add = add i64 %bitcast, -1
> > +  %addr = inttoptr i64 %add to i8*
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = sitofp i8 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @sfct18(i16* nocapture %sp0) {
> > +; CHECK-LABEL: sfct18:
> > +; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
> > +; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i16* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i16*
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = sitofp i16 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define float @sfct19(i32* nocapture %sp0) {
> > +; CHECK-LABEL: sfct19:
> > +; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i32* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i32*
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = sitofp i32 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +; i64 -> f32 is not supported on floating point unit.
> > +define float @sfct20(i64* nocapture %sp0) {
> > +; CHECK-LABEL: sfct20:
> > +; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i64* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i64*
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = sitofp i64 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +
> > +}
> > +
> > +; ********* 6s. load with unscaled imm to double. *********
> > +define double @sfct21(i8* nocapture %sp0) {
> > +entry:
> > +; CHECK-LABEL: sfct21:
> > +; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
> > +; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i8* %sp0 to i64
> > +  %add = add i64 %bitcast, -1
> > +  %addr = inttoptr i64 %add to i8*
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = sitofp i8 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct22(i16* nocapture %sp0) {
> > +; CHECK-LABEL: sfct22:
> > +; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
> > +; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
> > +; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i16* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i16*
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %val = sitofp i16 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct23(i32* nocapture %sp0) {
> > +; CHECK-LABEL: sfct23:
> > +; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
> > +; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i32* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i32*
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = sitofp i32 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +define double @sfct24(i64* nocapture %sp0) {
> > +; CHECK-LABEL: sfct24:
> > +; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i64* %sp0 to i64
> > +  %add = add i64 %bitcast, 1
> > +  %addr = inttoptr i64 %add to i64*
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %val = sitofp i64 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +
> > +}
> > +
> > +; Check that we do not use SSHLL code sequence when code size is a
> concern.
> > +define float @codesize_sfct17(i8* nocapture %sp0) optsize {
> > +entry:
> > +; CHECK-LABEL: codesize_sfct17:
> > +; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
> > +; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
> > +; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
> > +  %bitcast = ptrtoint i8* %sp0 to i64
> > +  %add = add i64 %bitcast, -1
> > +  %addr = inttoptr i64 %add to i8*
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %val = sitofp i8 %pix_sp0.0.copyload to float
> > +  %vmull.i = fmul float %val, %val
> > +  ret float %vmull.i
> > +}
> > +
> > +define double @codesize_sfct11(i32* nocapture %sp0) minsize {
> > +; CHECK-LABEL: sfct11:
> > +; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
> > +; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
> > +; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %val = sitofp i32 %pix_sp0.0.copyload to double
> > +  %vmull.i = fmul double %val, %val
> > +  ret double %vmull.i
> > +}
> > +
> > +; Adding fp128 custom lowering makes these a little fragile since we
> have to
> > +; return the correct mix of Legal/Expand from the custom method.
> > +;
> > +; rdar://problem/14991489
> > +
> > +define float @float_from_i128(i128 %in) {
> > +; CHECK-LABEL: float_from_i128:
> > +; CHECK: bl {{_?__floatuntisf}}
> > +  %conv = uitofp i128 %in to float
> > +  ret float %conv
> > +}
> > +
> > +define double @double_from_i128(i128 %in) {
> > +; CHECK-LABEL: double_from_i128:
> > +; CHECK: bl {{_?__floattidf}}
> > +  %conv = sitofp i128 %in to double
> > +  ret double %conv
> > +}
> > +
> > +define fp128 @fp128_from_i128(i128 %in) {
> > +; CHECK-LABEL: fp128_from_i128:
> > +; CHECK: bl {{_?__floatuntitf}}
> > +  %conv = uitofp i128 %in to fp128
> > +  ret fp128 %conv
> > +}
> > +
> > +define i128 @i128_from_float(float %in) {
> > +; CHECK-LABEL: i128_from_float
> > +; CHECK: bl {{_?__fixsfti}}
> > +  %conv = fptosi float %in to i128
> > +  ret i128 %conv
> > +}
> > +
> > +define i128 @i128_from_double(double %in) {
> > +; CHECK-LABEL: i128_from_double
> > +; CHECK: bl {{_?__fixunsdfti}}
> > +  %conv = fptoui double %in to i128
> > +  ret i128 %conv
> > +}
> > +
> > +define i128 @i128_from_fp128(fp128 %in) {
> > +; CHECK-LABEL: i128_from_fp128
> > +; CHECK: bl {{_?__fixtfti}}
> > +  %conv = fptosi fp128 %in to i128
> > +  ret i128 %conv
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/shifted-sext.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/shifted-sext.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/shifted-sext.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/shifted-sext.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,277 @@
> > +; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
> > +;
> > +; <rdar://problem/13820218>
> > +
> > +define signext i16 @extendedLeftShiftcharToshortBy4(i8 signext %a)
> nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftcharToshortBy4:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #28, #7
> > +  %inc = add i8 %a, 1
> > +  %conv1 = sext i8 %inc to i32
> > +  %shl = shl nsw i32 %conv1, 4
> > +  %conv2 = trunc i32 %shl to i16
> > +  ret i16 %conv2
> > +}
> > +
> > +define signext i16 @extendedRightShiftcharToshortBy4(i8 signext %a)
> nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftcharToshortBy4:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #4, #7
> > +  %inc = add i8 %a, 1
> > +  %conv1 = sext i8 %inc to i32
> > +  %shr4 = lshr i32 %conv1, 4
> > +  %conv2 = trunc i32 %shr4 to i16
> > +  ret i16 %conv2
> > +}
> > +
> > +define signext i16 @extendedLeftShiftcharToshortBy8(i8 signext %a)
> nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftcharToshortBy8:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #24, #7
> > +  %inc = add i8 %a, 1
> > +  %conv1 = sext i8 %inc to i32
> > +  %shl = shl nsw i32 %conv1, 8
> > +  %conv2 = trunc i32 %shl to i16
> > +  ret i16 %conv2
> > +}
> > +
> > +define signext i16 @extendedRightShiftcharToshortBy8(i8 signext %a)
> nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftcharToshortBy8:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sxtb [[REG]], [[REG]]
> > +; CHECK: asr w0, [[REG]], #8
> > +  %inc = add i8 %a, 1
> > +  %conv1 = sext i8 %inc to i32
> > +  %shr4 = lshr i32 %conv1, 8
> > +  %conv2 = trunc i32 %shr4 to i16
> > +  ret i16 %conv2
> > +}
> > +
> > +define i32 @extendedLeftShiftcharTointBy4(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftcharTointBy4:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #28, #7
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i32
> > +  %shl = shl nsw i32 %conv, 4
> > +  ret i32 %shl
> > +}
> > +
> > +define i32 @extendedRightShiftcharTointBy4(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftcharTointBy4:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #4, #7
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i32
> > +  %shr = ashr i32 %conv, 4
> > +  ret i32 %shr
> > +}
> > +
> > +define i32 @extendedLeftShiftcharTointBy8(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftcharTointBy8:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #24, #7
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i32
> > +  %shl = shl nsw i32 %conv, 8
> > +  ret i32 %shl
> > +}
> > +
> > +define i32 @extendedRightShiftcharTointBy8(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftcharTointBy8:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sxtb [[REG]], [[REG]]
> > +; CHECK: asr w0, [[REG]], #8
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i32
> > +  %shr = ashr i32 %conv, 8
> > +  ret i32 %shr
> > +}
> > +
> > +define i64 @extendedLeftShiftcharToint64By4(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftcharToint64By4:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #60, #7
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i64
> > +  %shl = shl nsw i64 %conv, 4
> > +  ret i64 %shl
> > +}
> > +
> > +define i64 @extendedRightShiftcharToint64By4(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftcharToint64By4:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #4, #7
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i64
> > +  %shr = ashr i64 %conv, 4
> > +  ret i64 %shr
> > +}
> > +
> > +define i64 @extendedLeftShiftcharToint64By8(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftcharToint64By8:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #56, #7
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i64
> > +  %shl = shl nsw i64 %conv, 8
> > +  ret i64 %shl
> > +}
> > +
> > +define i64 @extendedRightShiftcharToint64By8(i8 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftcharToint64By8:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sxtb x[[REG]], x[[REG]]
> > +; CHECK: asr x0, x[[REG]], #8
> > +  %inc = add i8 %a, 1
> > +  %conv = sext i8 %inc to i64
> > +  %shr = ashr i64 %conv, 8
> > +  ret i64 %shr
> > +}
> > +
> > +define i32 @extendedLeftShiftshortTointBy4(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftshortTointBy4:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #28, #15
> > +  %inc = add i16 %a, 1
> > +  %conv = sext i16 %inc to i32
> > +  %shl = shl nsw i32 %conv, 4
> > +  ret i32 %shl
> > +}
> > +
> > +define i32 @extendedRightShiftshortTointBy4(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftshortTointBy4:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sbfm w0, [[REG]], #4, #15
> > +  %inc = add i16 %a, 1
> > +  %conv = sext i16 %inc to i32
> > +  %shr = ashr i32 %conv, 4
> > +  ret i32 %shr
> > +}
> > +
> > +define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftshortTointBy16:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: lsl w0, [[REG]], #16
> > +  %inc = add i16 %a, 1
> > +  %conv2 = zext i16 %inc to i32
> > +  %shl = shl nuw i32 %conv2, 16
> > +  ret i32 %shl
> > +}
> > +
> > +define i32 @extendedRightShiftshortTointBy16(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftshortTointBy16:
> > +; CHECK: add [[REG:w[0-9]+]], w0, #1
> > +; CHECK: sxth [[REG]], [[REG]]
> > +; CHECK: asr w0, [[REG]], #16
> > +  %inc = add i16 %a, 1
> > +  %conv = sext i16 %inc to i32
> > +  %shr = ashr i32 %conv, 16
> > +  ret i32 %shr
> > +}
> > +
> > +define i64 @extendedLeftShiftshortToint64By4(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftshortToint64By4:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #60, #15
> > +  %inc = add i16 %a, 1
> > +  %conv = sext i16 %inc to i64
> > +  %shl = shl nsw i64 %conv, 4
> > +  ret i64 %shl
> > +}
> > +
> > +define i64 @extendedRightShiftshortToint64By4(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftshortToint64By4:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #4, #15
> > +  %inc = add i16 %a, 1
> > +  %conv = sext i16 %inc to i64
> > +  %shr = ashr i64 %conv, 4
> > +  ret i64 %shr
> > +}
> > +
> > +define i64 @extendedLeftShiftshortToint64By16(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftshortToint64By16:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #48, #15
> > +  %inc = add i16 %a, 1
> > +  %conv = sext i16 %inc to i64
> > +  %shl = shl nsw i64 %conv, 16
> > +  ret i64 %shl
> > +}
> > +
> > +define i64 @extendedRightShiftshortToint64By16(i16 signext %a) nounwind
> readnone ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftshortToint64By16:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sxth x[[REG]], x[[REG]]
> > +; CHECK: asr x0, x[[REG]], #16
> > +  %inc = add i16 %a, 1
> > +  %conv = sext i16 %inc to i64
> > +  %shr = ashr i64 %conv, 16
> > +  ret i64 %shr
> > +}
> > +
> > +define i64 @extendedLeftShiftintToint64By4(i32 %a) nounwind readnone
> ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftintToint64By4:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #60, #31
> > +  %inc = add nsw i32 %a, 1
> > +  %conv = sext i32 %inc to i64
> > +  %shl = shl nsw i64 %conv, 4
> > +  ret i64 %shl
> > +}
> > +
> > +define i64 @extendedRightShiftintToint64By4(i32 %a) nounwind readnone
> ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftintToint64By4:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sbfm x0, x[[REG]], #4, #31
> > +  %inc = add nsw i32 %a, 1
> > +  %conv = sext i32 %inc to i64
> > +  %shr = ashr i64 %conv, 4
> > +  ret i64 %shr
> > +}
> > +
> > +define i64 @extendedLeftShiftintToint64By32(i32 %a) nounwind readnone
> ssp {
> > +entry:
> > +; CHECK-LABEL: extendedLeftShiftintToint64By32:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: lsl x0, x[[REG]], #32
> > +  %inc = add nsw i32 %a, 1
> > +  %conv2 = zext i32 %inc to i64
> > +  %shl = shl nuw i64 %conv2, 32
> > +  ret i64 %shl
> > +}
> > +
> > +define i64 @extendedRightShiftintToint64By32(i32 %a) nounwind readnone
> ssp {
> > +entry:
> > +; CHECK-LABEL: extendedRightShiftintToint64By32:
> > +; CHECK: add w[[REG:[0-9]+]], w0, #1
> > +; CHECK: sxtw x[[REG]], x[[REG]]
> > +; CHECK: asr x0, x[[REG]], #32
> > +  %inc = add nsw i32 %a, 1
> > +  %conv = sext i32 %inc to i64
> > +  %shr = ashr i64 %conv, 32
> > +  ret i64 %shr
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/simd-scalar-to-vector.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/simd-scalar-to-vector.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/simd-scalar-to-vector.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/simd-scalar-to-vector.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,17 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp {
> > +; CHECK: uaddlv.16b    h0, v0
> > +; CHECK: rshrn.8b      v0, v0, #4
> > +; CHECK: dup.16b       v0, v0[0]
> > +; CHECK: ret
> > +  %tmp = tail call i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
> nounwind
> > +  %tmp1 = trunc i32 %tmp to i16
> > +  %tmp2 = insertelement <8 x i16> undef, i16 %tmp1, i32 0
> > +  %tmp3 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>
> %tmp2, i32 4)
> > +  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32>
> zeroinitializer
> > +  ret <16 x i8> %tmp4
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind
> readnone
> > +declare i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind
> readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/simplest-elf.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/simplest-elf.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/simplest-elf.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/simplest-elf.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,18 @@
> > +; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj < %s | llvm-objdump -
> -r -d --triple=arm64-linux-gnu | FileCheck --check-prefix=CHECK-ELF %s
> > +
> > +define void @foo() nounwind {
> > +  ret void
> > +}
> > +
> > +  ; Check source looks ELF-like: no leading underscore, comments with //
> > +; CHECK: foo: // @foo
> > +; CHECK:     ret
> > +
> > +  ; Similarly make sure ELF output works and is vaguely sane: aarch64
> target
> > +  ; machine with correct section & symbol names.
> > +; CHECK-ELF: file format ELF64-aarch64
> > +
> > +; CHECK-ELF: Disassembly of section .text
> > +; CHECK-ELF-LABEL: foo:
> > +; CHECK-ELF:    ret
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/sincos.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/sincos.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/sincos.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/sincos.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,31 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7 | FileCheck %s
> > +
> > +; Combine sin / cos into a single call.
> > +; rdar://12856873
> > +
> > +define float @test1(float %x) nounwind {
> > +entry:
> > +; CHECK-LABEL: test1:
> > +; CHECK: bl ___sincosf_stret
> > +; CHECK: fadd s0, s0, s1
> > +  %call = tail call float @sinf(float %x) nounwind readnone
> > +  %call1 = tail call float @cosf(float %x) nounwind readnone
> > +  %add = fadd float %call, %call1
> > +  ret float %add
> > +}
> > +
> > +define double @test2(double %x) nounwind {
> > +entry:
> > +; CHECK-LABEL: test2:
> > +; CHECK: bl ___sincos_stret
> > +; CHECK: fadd d0, d0, d1
> > +  %call = tail call double @sin(double %x) nounwind readnone
> > +  %call1 = tail call double @cos(double %x) nounwind readnone
> > +  %add = fadd double %call, %call1
> > +  ret double %add
> > +}
> > +
> > +declare float  @sinf(float) readonly
> > +declare double @sin(double) readonly
> > +declare float @cosf(float) readonly
> > +declare double @cos(double) readonly
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/sitofp-combine-chains.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/sitofp-combine-chains.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/sitofp-combine-chains.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/sitofp-combine-chains.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,22 @@
> > +; RUN: llc -march=arm64 -o -  %s | FileCheck %s
> > +
> > +; ARM64ISelLowering.cpp was creating a new (floating-point) load for
> efficiency
> > +; but not updating chain-successors of the old one. As a result, the
> two memory
> > +; operations in this function both ended up direct successors to the
> EntryToken
> > +; and could be reordered.
> > +
> > + at var = global i32 0, align 4
> > +
> > +define float @foo() {
> > +; CHECK-LABEL: foo:
> > +  ; Load must come before we clobber @var
> > +; CHECK: adrp x[[VARBASE:[0-9]+]], {{_?var}}
> > +; CHECK: ldr [[SREG:s[0-9]+]], [x[[VARBASE]],
> > +; CHECK: str wzr, [x[[VARBASE]],
> > +
> > +  %val = load i32* @var, align 4
> > +  store i32 0, i32* @var, align 4
> > +
> > +  %fltval = sitofp i32 %val to float
> > +  ret float %fltval
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/sli-sri-opt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/sli-sri-opt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/sli-sri-opt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/sli-sri-opt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,41 @@
> > +; RUN: llc -arm64-shift-insert-generation=true -march=arm64
> -arm64-neon-syntax=apple < %s | FileCheck %s
> > +
> > +define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>*
> %dest) nounwind {
> > +; CHECK-LABEL: testLeftGood:
> > +; CHECK: sli.16b v0, v1, #3
> > +  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8
> 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8
> 252, i8 252, i8 252>
> > +  %vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
> i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
> > +  %result = or <16 x i8> %and.i, %vshl_n
> > +  store <16 x i8> %result, <16 x i8>* %dest, align 16
> > +  ret void
> > +}
> > +
> > +define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>*
> %dest) nounwind {
> > +; CHECK-LABEL: testLeftBad:
> > +; CHECK-NOT: sli
> > +  %and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8
> 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8
> 165, i8 165, i8 165>
> > +  %vshl_n = shl <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
> i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
> > +  %result = or <16 x i8> %and.i, %vshl_n
> > +  store <16 x i8> %result, <16 x i8>* %dest, align 16
> > +  ret void
> > +}
> > +
> > +define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>*
> %dest) nounwind {
> > +; CHECK-LABEL: testRightGood:
> > +; CHECK: sri.16b v0, v1, #3
> > +  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8
> 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8
> 252, i8 252, i8 252>
> > +  %vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
> i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
> > +  %result = or <16 x i8> %and.i, %vshl_n
> > +  store <16 x i8> %result, <16 x i8>* %dest, align 16
> > +  ret void
> > +}
> > +
> > +define void @testRightBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>*
> %dest) nounwind {
> > +; CHECK-LABEL: testRightBad:
> > +; CHECK-NOT: sri
> > +  %and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8
> 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8
> 165, i8 165, i8 165>
> > +  %vshl_n = lshr <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
> i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
> > +  %result = or <16 x i8> %and.i, %vshl_n
> > +  store <16 x i8> %result, <16 x i8>* %dest, align 16
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/smaxv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/smaxv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/smaxv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/smaxv.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,74 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
> > +
> > +define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
> > +; CHECK: test_vmaxv_s8
> > +; CHECK: smaxv.8b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8> %a1)
> > +  %0 = trunc i32 %vmaxv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define signext i16 @test_vmaxv_s16(<4 x i16> %a1) {
> > +; CHECK: test_vmaxv_s16
> > +; CHECK: smaxv.4h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>
> %a1)
> > +  %0 = trunc i32 %vmaxv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vmaxv_s32(<2 x i32> %a1) {
> > +; CHECK: test_vmaxv_s32
> > +; 2 x i32 is not supported by the ISA, thus, this is a special case
> > +; CHECK: smaxp.2s v[[REGNUM:[0-9]+]], v0, v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32>
> %a1)
> > +  ret i32 %vmaxv.i
> > +}
> > +
> > +define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) {
> > +; CHECK: test_vmaxvq_s8
> > +; CHECK: smaxv.16b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>
> %a1)
> > +  %0 = trunc i32 %vmaxv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) {
> > +; CHECK: test_vmaxvq_s16
> > +; CHECK: smaxv.8h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>
> %a1)
> > +  %0 = trunc i32 %vmaxv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vmaxvq_s32(<4 x i32> %a1) {
> > +; CHECK: test_vmaxvq_s32
> > +; CHECK: smaxv.4s [[REGNUM:s[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, [[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>
> %a1)
> > +  ret i32 %vmaxv.i
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>)
> > +declare i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>)
> > +declare i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>)
> > +declare i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32>)
> > +declare i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>)
> > +declare i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8>)
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/sminv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/sminv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/sminv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/sminv.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,74 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
> > +
> > +define signext i8 @test_vminv_s8(<8 x i8> %a1) {
> > +; CHECK: test_vminv_s8
> > +; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8> %a1)
> > +  %0 = trunc i32 %vminv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define signext i16 @test_vminv_s16(<4 x i16> %a1) {
> > +; CHECK: test_vminv_s16
> > +; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>
> %a1)
> > +  %0 = trunc i32 %vminv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vminv_s32(<2 x i32> %a1) {
> > +; CHECK: test_vminv_s32
> > +; 2 x i32 is not supported by the ISA, thus, this is a special case
> > +; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v0, v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32>
> %a1)
> > +  ret i32 %vminv.i
> > +}
> > +
> > +define signext i8 @test_vminvq_s8(<16 x i8> %a1) {
> > +; CHECK: test_vminvq_s8
> > +; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>
> %a1)
> > +  %0 = trunc i32 %vminv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define signext i16 @test_vminvq_s16(<8 x i16> %a1) {
> > +; CHECK: test_vminvq_s16
> > +; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>
> %a1)
> > +  %0 = trunc i32 %vminv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vminvq_s32(<4 x i32> %a1) {
> > +; CHECK: test_vminvq_s32
> > +; CHECK: sminv.4s [[REGNUM:s[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, [[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>
> %a1)
> > +  ret i32 %vminv.i
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>)
> > +declare i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>)
> > +declare i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>)
> > +declare i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32>)
> > +declare i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>)
> > +declare i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8>)
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/spill-lr.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/spill-lr.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/spill-lr.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/spill-lr.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,74 @@
> > +; RUN: llc -mtriple=arm64-apple-ios < %s
> > + at bar = common global i32 0, align 4
> > +
> > +; Leaf function which uses all callee-saved registers and allocates >=
> 256 bytes on the stack
> > +; this will cause processFunctionBeforeCalleeSavedScan() to spill LR as
> an additional scratch
> > +; register.
> > +;
> > +; This is a crash-only regression test for rdar://15124582.
> > +define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g,
> i32 %h) nounwind {
> > +entry:
> > +  %stack = alloca [128 x i32], align 4
> > +  %0 = bitcast [128 x i32]* %stack to i8*
> > +  %idxprom = sext i32 %a to i64
> > +  %arrayidx = getelementptr inbounds [128 x i32]* %stack, i64 0, i64
> %idxprom
> > +  store i32 %b, i32* %arrayidx, align 4
> > +  %1 = load volatile i32* @bar, align 4
> > +  %2 = load volatile i32* @bar, align 4
> > +  %3 = load volatile i32* @bar, align 4
> > +  %4 = load volatile i32* @bar, align 4
> > +  %5 = load volatile i32* @bar, align 4
> > +  %6 = load volatile i32* @bar, align 4
> > +  %7 = load volatile i32* @bar, align 4
> > +  %8 = load volatile i32* @bar, align 4
> > +  %9 = load volatile i32* @bar, align 4
> > +  %10 = load volatile i32* @bar, align 4
> > +  %11 = load volatile i32* @bar, align 4
> > +  %12 = load volatile i32* @bar, align 4
> > +  %13 = load volatile i32* @bar, align 4
> > +  %14 = load volatile i32* @bar, align 4
> > +  %15 = load volatile i32* @bar, align 4
> > +  %16 = load volatile i32* @bar, align 4
> > +  %17 = load volatile i32* @bar, align 4
> > +  %18 = load volatile i32* @bar, align 4
> > +  %19 = load volatile i32* @bar, align 4
> > +  %20 = load volatile i32* @bar, align 4
> > +  %idxprom1 = sext i32 %c to i64
> > +  %arrayidx2 = getelementptr inbounds [128 x i32]* %stack, i64 0, i64
> %idxprom1
> > +  %21 = load i32* %arrayidx2, align 4
> > +  %factor = mul i32 %h, -2
> > +  %factor67 = mul i32 %g, -2
> > +  %factor68 = mul i32 %f, -2
> > +  %factor69 = mul i32 %e, -2
> > +  %factor70 = mul i32 %d, -2
> > +  %factor71 = mul i32 %c, -2
> > +  %factor72 = mul i32 %b, -2
> > +  %sum = add i32 %2, %1
> > +  %sum73 = add i32 %sum, %3
> > +  %sum74 = add i32 %sum73, %4
> > +  %sum75 = add i32 %sum74, %5
> > +  %sum76 = add i32 %sum75, %6
> > +  %sum77 = add i32 %sum76, %7
> > +  %sum78 = add i32 %sum77, %8
> > +  %sum79 = add i32 %sum78, %9
> > +  %sum80 = add i32 %sum79, %10
> > +  %sum81 = add i32 %sum80, %11
> > +  %sum82 = add i32 %sum81, %12
> > +  %sum83 = add i32 %sum82, %13
> > +  %sum84 = add i32 %sum83, %14
> > +  %sum85 = add i32 %sum84, %15
> > +  %sum86 = add i32 %sum85, %16
> > +  %sum87 = add i32 %sum86, %17
> > +  %sum88 = add i32 %sum87, %18
> > +  %sum89 = add i32 %sum88, %19
> > +  %sum90 = add i32 %sum89, %20
> > +  %sub15 = sub i32 %21, %sum90
> > +  %sub16 = add i32 %sub15, %factor
> > +  %sub17 = add i32 %sub16, %factor67
> > +  %sub18 = add i32 %sub17, %factor68
> > +  %sub19 = add i32 %sub18, %factor69
> > +  %sub20 = add i32 %sub19, %factor70
> > +  %sub21 = add i32 %sub20, %factor71
> > +  %add = add i32 %sub21, %factor72
> > +  ret i32 %add
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/spill.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/spill.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/spill.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/spill.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,15 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -verify-machineinstrs
> > +
> > +; CHECK: fpr128
> > +; CHECK: ld1.2d
> > +; CHECK: str q
> > +; CHECK: inlineasm
> > +; CHECK: ldr q
> > +; CHECK: st1.2d
> > +define void @fpr128(<4 x float>* %p) nounwind ssp {
> > +entry:
> > +  %x = load <4 x float>* %p, align 16
> > +  call void asm sideeffect "; inlineasm",
> "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"()
> nounwind
> > +  store <4 x float> %x, <4 x float>* %p, align 16
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/st1.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/st1.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/st1.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/st1.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,628 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -verify-machineinstrs | FileCheck %s
> > +
> > +define void @st1lane_16b(<16 x i8> %A, i8* %D) {
> > +; CHECK: st1lane_16b
> > +; CHECK: st1.b
> > +  %tmp = extractelement <16 x i8> %A, i32 1
> > +  store i8 %tmp, i8* %D
> > +  ret void
> > +}
> > +
> > +define void @st1lane_8h(<8 x i16> %A, i16* %D) {
> > +; CHECK: st1lane_8h
> > +; CHECK: st1.h
> > +  %tmp = extractelement <8 x i16> %A, i32 1
> > +  store i16 %tmp, i16* %D
> > +  ret void
> > +}
> > +
> > +define void @st1lane_4s(<4 x i32> %A, i32* %D) {
> > +; CHECK: st1lane_4s
> > +; CHECK: st1.s
> > +  %tmp = extractelement <4 x i32> %A, i32 1
> > +  store i32 %tmp, i32* %D
> > +  ret void
> > +}
> > +
> > +define void @st1lane_2d(<2 x i64> %A, i64* %D) {
> > +; CHECK: st1lane_2d
> > +; CHECK: st1.d
> > +  %tmp = extractelement <2 x i64> %A, i32 1
> > +  store i64 %tmp, i64* %D
> > +  ret void
> > +}
> > +
> > +define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
> > +; CHECK: st2lane_16b
> > +; CHECK: st2.b
> > +  call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8>
> %B, i64 1, i8* %D)
> > +  ret void
> > +}
> > +
> > +define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
> > +; CHECK: st2lane_8h
> > +; CHECK: st2.h
> > +  call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x
> i16> %B, i64 1, i16* %D)
> > +  ret void
> > +}
> > +
> > +define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
> > +; CHECK: st2lane_4s
> > +; CHECK: st2.s
> > +  call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x
> i32> %B, i64 1, i32* %D)
> > +  ret void
> > +}
> > +
> > +define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
> > +; CHECK: st2lane_2d
> > +; CHECK: st2.d
> > +  call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x
> i64> %B, i64 1, i64* %D)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>,
> i64, i8*) nounwind readnone
> > +declare void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>,
> i64, i16*) nounwind readnone
> > +declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>,
> i64, i32*) nounwind readnone
> > +declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>,
> i64, i64*) nounwind readnone
> > +
> > +define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8*
> %D) {
> > +; CHECK: st3lane_16b
> > +; CHECK: st3.b
> > +  call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8>
> %B, <16 x i8> %C, i64 1, i8* %D)
> > +  ret void
> > +}
> > +
> > +define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16*
> %D) {
> > +; CHECK: st3lane_8h
> > +; CHECK: st3.h
> > +  call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x
> i16> %B, <8 x i16> %C, i64 1, i16* %D)
> > +  ret void
> > +}
> > +
> > +define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32*
> %D) {
> > +; CHECK: st3lane_4s
> > +; CHECK: st3.s
> > +  call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x
> i32> %B, <4 x i32> %C, i64 1, i32* %D)
> > +  ret void
> > +}
> > +
> > +define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64*
> %D) {
> > +; CHECK: st3lane_2d
> > +; CHECK: st3.d
> > +  call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x
> i64> %B, <2 x i64> %C, i64 1, i64* %D)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>,
> <16 x i8>, i64, i8*) nounwind readnone
> > +declare void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>,
> <8 x i16>, i64, i16*) nounwind readnone
> > +declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>,
> <4 x i32>, i64, i32*) nounwind readnone
> > +declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>,
> <2 x i64>, i64, i64*) nounwind readnone
> > +
> > +define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16
> x i8> %D, i8* %E) {
> > +; CHECK: st4lane_16b
> > +; CHECK: st4.b
> > +  call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8>
> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
> > +  ret void
> > +}
> > +
> > +define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x
> i16> %D, i16* %E) {
> > +; CHECK: st4lane_8h
> > +; CHECK: st4.h
> > +  call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x
> i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
> > +  ret void
> > +}
> > +
> > +define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x
> i32> %D, i32* %E) {
> > +; CHECK: st4lane_4s
> > +; CHECK: st4.s
> > +  call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x
> i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
> > +  ret void
> > +}
> > +
> > +define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x
> i64> %D, i64* %E) {
> > +; CHECK: st4lane_2d
> > +; CHECK: st4.d
> > +  call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x
> i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>,
> <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
> > +declare void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>,
> <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
> > +declare void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>,
> <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
> > +declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>,
> <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
> > +
> > +
> > +define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
> > +; CHECK: st2_8b
> > +; CHECK st2.8b
> > +       call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8>
> %B, i8* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
> nounwind {
> > +; CHECK: st3_8b
> > +; CHECK st3.8b
> > +       call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8>
> %B, <8 x i8> %C, i8* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
> i8* %P) nounwind {
> > +; CHECK: st4_8b
> > +; CHECK st4.8b
> > +       call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8>
> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
> nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x
> i8>, i8*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x
> i8>, <8 x i8>, i8*) nounwind readonly
> > +
> > +define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
> > +; CHECK: st2_16b
> > +; CHECK st2.16b
> > +       call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x
> i8> %B, i8* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
> nounwind {
> > +; CHECK: st3_16b
> > +; CHECK st3.16b
> > +       call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x
> i8> %B, <16 x i8> %C, i8* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x
> i8> %D, i8* %P) nounwind {
> > +; CHECK: st4_16b
> > +; CHECK st4.16b
> > +       call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x
> i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
> nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16
> x i8>, i8*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16
> x i8>, <16 x i8>, i8*) nounwind readonly
> > +
> > +define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
> > +; CHECK: st2_4h
> > +; CHECK st2.4h
> > +       call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x
> i16> %B, i16* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
> nounwind {
> > +; CHECK: st3_4h
> > +; CHECK st3.4h
> > +       call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x
> i16> %B, <4 x i16> %C, i16* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16>
> %D, i16* %P) nounwind {
> > +; CHECK: st4_4h
> > +; CHECK st4.4h
> > +       call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x
> i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>,
> i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4
> x i16>, i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4
> x i16>, <4 x i16>, i16*) nounwind readonly
> > +
> > +define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
> > +; CHECK: st2_8h
> > +; CHECK st2.8h
> > +       call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x
> i16> %B, i16* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
> nounwind {
> > +; CHECK: st3_8h
> > +; CHECK st3.8h
> > +       call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x
> i16> %B, <8 x i16> %C, i16* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16>
> %D, i16* %P) nounwind {
> > +; CHECK: st4_8h
> > +; CHECK st4.8h
> > +       call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x
> i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>,
> i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8
> x i16>, i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8
> x i16>, <8 x i16>, i16*) nounwind readonly
> > +
> > +define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
> > +; CHECK: st2_2s
> > +; CHECK st2.2s
> > +       call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x
> i32> %B, i32* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
> nounwind {
> > +; CHECK: st3_2s
> > +; CHECK st3.2s
> > +       call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x
> i32> %B, <2 x i32> %C, i32* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32>
> %D, i32* %P) nounwind {
> > +; CHECK: st4_2s
> > +; CHECK st4.2s
> > +       call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x
> i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>,
> i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2
> x i32>, i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2
> x i32>, <2 x i32>, i32*) nounwind readonly
> > +
> > +define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
> > +; CHECK: st2_4s
> > +; CHECK st2.4s
> > +       call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x
> i32> %B, i32* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
> nounwind {
> > +; CHECK: st3_4s
> > +; CHECK st3.4s
> > +       call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x
> i32> %B, <4 x i32> %C, i32* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32>
> %D, i32* %P) nounwind {
> > +; CHECK: st4_4s
> > +; CHECK st4.4s
> > +       call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x
> i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>,
> i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4
> x i32>, i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4
> x i32>, <4 x i32>, i32*) nounwind readonly
> > +
> > +define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
> > +; CHECK: st2_1d
> > +; CHECK st1.2d
> > +       call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x
> i64> %B, i64* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
> nounwind {
> > +; CHECK: st3_1d
> > +; CHECK st1.3d
> > +       call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x
> i64> %B, <1 x i64> %C, i64* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64>
> %D, i64* %P) nounwind {
> > +; CHECK: st4_1d
> > +; CHECK st1.4d
> > +       call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x
> i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>,
> i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1
> x i64>, i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1
> x i64>, <1 x i64>, i64*) nounwind readonly
> > +
> > +define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
> > +; CHECK: st2_2d
> > +; CHECK st2.2d
> > +       call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x
> i64> %B, i64* %P)
> > +       ret void
> > +}
> > +
> > +define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
> nounwind {
> > +; CHECK: st3_2d
> > +; CHECK st2.3d
> > +       call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x
> i64> %B, <2 x i64> %C, i64* %P)
> > +       ret void
> > +}
> > +
> > +define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
> %D, i64* %P) nounwind {
> > +; CHECK: st4_2d
> > +; CHECK st2.4d
> > +       call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x
> i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
> > +       ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>,
> i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2
> x i64>, i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2
> x i64>, <2 x i64>, i64*) nounwind readonly
> > +
> > +declare void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
> nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>,
> i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>,
> i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x
> float>, float*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>,
> i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x
> double>, double*) nounwind readonly
> > +
> > +define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
> > +; CHECK-LABEL: st1_x2_v8i8:
> > +; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B,
> i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
> > +; CHECK-LABEL: st1_x2_v4i16:
> > +; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16>
> %B, i16* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
> > +; CHECK-LABEL: st1_x2_v2i32:
> > +; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32>
> %B, i32* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr)
> {
> > +; CHECK-LABEL: st1_x2_v2f32:
> > +; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x
> float> %B, float* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
> > +; CHECK-LABEL: st1_x2_v1i64:
> > +; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64>
> %B, i64* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double*
> %addr) {
> > +; CHECK-LABEL: st1_x2_v1f64:
> > +; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x
> double> %B, double* %addr)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>,
> i8*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>,
> i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>,
> i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x
> float>, float*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>,
> i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x
> double>, double*) nounwind readonly
> > +
> > +define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
> > +; CHECK-LABEL: st1_x2_v16i8:
> > +; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8>
> %B, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
> > +; CHECK-LABEL: st1_x2_v8i16:
> > +; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16>
> %B, i16* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
> > +; CHECK-LABEL: st1_x2_v4i32:
> > +; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32>
> %B, i32* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr)
> {
> > +; CHECK-LABEL: st1_x2_v4f32:
> > +; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x
> float> %B, float* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
> > +; CHECK-LABEL: st1_x2_v2i64:
> > +; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64>
> %B, i64* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double*
> %addr) {
> > +; CHECK-LABEL: st1_x2_v2f64:
> > +; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x
> double> %B, double* %addr)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x
> i8>, i8*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>,
> <4 x i16>, i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>,
> <2 x i32>, i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x
> float>, <2 x float>, float*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>,
> <1 x i64>, i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x
> double>, <1 x double>, double*) nounwind readonly
> > +
> > +define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8*
> %addr) {
> > +; CHECK-LABEL: st1_x3_v8i8:
> > +; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B,
> <8 x i8> %C, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C,
> i16* %addr) {
> > +; CHECK-LABEL: st1_x3_v4i16:
> > +; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16>
> %B, <4 x i16> %C, i16* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C,
> i32* %addr) {
> > +; CHECK-LABEL: st1_x3_v2i32:
> > +; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32>
> %B, <2 x i32> %C, i32* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float>
> %C, float* %addr) {
> > +; CHECK-LABEL: st1_x3_v2f32:
> > +; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x
> float> %B, <2 x float> %C, float* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C,
> i64* %addr) {
> > +; CHECK-LABEL: st1_x3_v1i64:
> > +; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64>
> %B, <1 x i64> %C, i64* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x
> double> %C, double* %addr) {
> > +; CHECK-LABEL: st1_x3_v1f64:
> > +; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x
> double> %B, <1 x double> %C, double* %addr)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>,
> <16 x i8>, i8*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>,
> <8 x i16>, i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>,
> <4 x i32>, i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, float*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>,
> <2 x i64>, i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x
> double>, <2 x double>, double*) nounwind readonly
> > +
> > +define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8*
> %addr) {
> > +; CHECK-LABEL: st1_x3_v16i8:
> > +; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8>
> %B, <16 x i8> %C, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C,
> i16* %addr) {
> > +; CHECK-LABEL: st1_x3_v8i16:
> > +; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16>
> %B, <8 x i16> %C, i16* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C,
> i32* %addr) {
> > +; CHECK-LABEL: st1_x3_v4i32:
> > +; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32>
> %B, <4 x i32> %C, i32* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float>
> %C, float* %addr) {
> > +; CHECK-LABEL: st1_x3_v4f32:
> > +; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x
> float> %B, <4 x float> %C, float* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C,
> i64* %addr) {
> > +; CHECK-LABEL: st1_x3_v2i64:
> > +; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64>
> %B, <2 x i64> %C, i64* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x
> double> %C, double* %addr) {
> > +; CHECK-LABEL: st1_x3_v2f64:
> > +; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
> > +  call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x
> double> %B, <2 x double> %C, double* %addr)
> > +  ret void
> > +}
> > +
> > +
> > +declare void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x
> i8>, <8 x i8>, i8*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>,
> <4 x i16>, <4 x i16>, i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>,
> <2 x i32>, <2 x i32>, i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x
> float>, <2 x float>, <2 x float>, float*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>,
> <1 x i64>, <1 x i64>, i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x
> double>, <1 x double>, <1 x double>, double*) nounwind readonly
> > +
> > +define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x
> i8> %D, i8* %addr) {
> > +; CHECK-LABEL: st1_x4_v8i8:
> > +; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B,
> <8 x i8> %C, <8 x i8> %D, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4
> x i16> %D, i16* %addr) {
> > +; CHECK-LABEL: st1_x4_v4i16:
> > +; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16>
> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2
> x i32> %D, i32* %addr) {
> > +; CHECK-LABEL: st1_x4_v2i32:
> > +; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32>
> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float>
> %C, <2 x float> %D, float* %addr) {
> > +; CHECK-LABEL: st1_x4_v2f32:
> > +; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x
> float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1
> x i64> %D, i64* %addr) {
> > +; CHECK-LABEL: st1_x4_v1i64:
> > +; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64>
> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x
> double> %C, <1 x double> %D, double* %addr) {
> > +; CHECK-LABEL: st1_x4_v1f64:
> > +; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x
> double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>,
> <16 x i8>, <16 x i8>, i8*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>,
> <8 x i16>, <8 x i16>, i16*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>,
> <4 x i32>, <4 x i32>, i32*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, <4 x float>, float*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>,
> <2 x i64>, <2 x i64>, i64*) nounwind readonly
> > +declare void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x
> double>, <2 x double>, <2 x double>, double*) nounwind readonly
> > +
> > +define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16
> x i8> %D, i8* %addr) {
> > +; CHECK-LABEL: st1_x4_v16i8:
> > +; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
> }, [x0]
> > +  call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8>
> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8
> x i16> %D, i16* %addr) {
> > +; CHECK-LABEL: st1_x4_v8i16:
> > +; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16>
> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4
> x i32> %D, i32* %addr) {
> > +; CHECK-LABEL: st1_x4_v4i32:
> > +; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32>
> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float>
> %C, <4 x float> %D, float* %addr) {
> > +; CHECK-LABEL: st1_x4_v4f32:
> > +; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x
> float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2
> x i64> %D, i64* %addr) {
> > +; CHECK-LABEL: st1_x4_v2i64:
> > +; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64>
> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
> > +  ret void
> > +}
> > +
> > +define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x
> double> %C, <2 x double> %D, double* %addr) {
> > +; CHECK-LABEL: st1_x4_v2f64:
> > +; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} },
> [x0]
> > +  call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x
> double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/stack-no-frame.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/stack-no-frame.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/stack-no-frame.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/stack-no-frame.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,20 @@
> > +; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
> > +
> > + at global = global [20 x i64] zeroinitializer, align 8
> > +
> > +; The following function has enough locals to need some restoring, but
> not a
> > +; frame record. In an intermediate frame refactoring, prologue and
> epilogue were
> > +; inconsistent about how much to move SP.
> > +define void @test_stack_no_frame() {
> > +; CHECK: test_stack_no_frame
> > +; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
> > +  %local = alloca [20 x i64]
> > +  %val = load volatile [20 x i64]* @global, align 8
> > +  store volatile [20 x i64] %val, [20 x i64]* %local, align 8
> > +
> > +  %val2 = load volatile [20 x i64]* %local, align 8
> > +  store volatile [20 x i64] %val2, [20 x i64]* @global, align 8
> > +
> > +; CHECK: add sp, sp, #[[STACKSIZE]]
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/stackmap.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/stackmap.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/stackmap.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/stackmap.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,281 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
> > +;
> > +; Note: Print verbose stackmaps using -debug-only=stackmaps.
> > +
> > +; We are not getting the correct stack alignment when cross compiling
> for arm64.
> > +; So specify a datalayout here.
> > +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
> > +
> > +; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
> > +; CHECK-NEXT:  __LLVM_StackMaps:
> > +; CHECK-NEXT:   .long   0
> > +; Num Functions
> > +; CHECK-NEXT:   .long 11
> > +; CHECK-NEXT:   .long _constantargs
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _osrinline
> > +; CHECK-NEXT:   .long 32
> > +; CHECK-NEXT:   .long _osrcold
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _propertyRead
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _propertyWrite
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _jsVoidCall
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _jsIntCall
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _spilledValue
> > +; CHECK-NEXT:   .long 160
> > +; CHECK-NEXT:   .long _spilledStackMapValue
> > +; CHECK-NEXT:   .long 128
> > +; CHECK-NEXT:   .long _liveConstant
> > +; CHECK-NEXT:   .long 16
> > +; CHECK-NEXT:   .long _clobberLR
> > +; CHECK-NEXT:   .long 112
> > +; Num LargeConstants
> > +; CHECK-NEXT:   .long   2
> > +; CHECK-NEXT:   .quad   4294967295
> > +; CHECK-NEXT:   .quad   4294967296
> > +; Num Callsites
> > +; CHECK-NEXT:   .long   11
> > +
> > +; Constant arguments
> > +;
> > +; CHECK-NEXT:   .quad   1
> > +; CHECK-NEXT:   .long   L{{.*}}-_constantargs
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .short  4
> > +; SmallConstant
> > +; CHECK-NEXT:   .byte   4
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .long   65535
> > +; SmallConstant
> > +; CHECK-NEXT:   .byte   4
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .long   65536
> > +; SmallConstant
> > +; CHECK-NEXT:   .byte   5
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .long   0
> > +; LargeConstant at index 0
> > +; CHECK-NEXT:   .byte   5
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .long   1
> > +
> > +define void @constantargs() {
> > +entry:
> > +  %0 = inttoptr i64 244837814094590 to i8*
> > +  tail call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 1, i32 20, i8* %0, i32 0, i64 65535,
> i64 65536, i64 4294967295, i64 4294967296)
> > +  ret void
> > +}
> > +
> > +; Inline OSR Exit
> > +;
> > +; CHECK-LABEL:  .long   L{{.*}}-_osrinline
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .short  2
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long  0
> > +define void @osrinline(i64 %a, i64 %b) {
> > +entry:
> > +  ; Runtime void->void call.
> > +  call void inttoptr (i64 244837814094590 to void ()*)()
> > +  ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live
> vars.
> > +  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12,
> i64 %a, i64 %b)
> > +  ret void
> > +}
> > +
> > +; Cold OSR Exit
> > +;
> > +; 2 live variables in register.
> > +;
> > +; CHECK-LABEL:  .long   L{{.*}}-_osrcold
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .short  2
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long  0
> > +define void @osrcold(i64 %a, i64 %b) {
> > +entry:
> > +  %test = icmp slt i64 %a, %b
> > +  br i1 %test, label %ret, label %cold
> > +cold:
> > +  ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
> > +  %thunk = inttoptr i64 244837814094590 to i8*
> > +  call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 4, i32 20, i8* %thunk, i32 0, i64
> %a, i64 %b)
> > +  unreachable
> > +ret:
> > +  ret void
> > +}
> > +
> > +; Property Read
> > +; CHECK-LABEL:  .long   L{{.*}}-_propertyRead
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .short  0
> > +;
> > +; FIXME: There are currently no stackmap entries. After moving to
> > +; AnyRegCC, we will have entries for the object and return value.
> > +define i64 @propertyRead(i64* %obj) {
> > +entry:
> > +  %resolveRead = inttoptr i64 244837814094590 to i8*
> > +  %result = call i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveRead, i32 1,
> i64* %obj)
> > +  %add = add i64 %result, 3
> > +  ret i64 %add
> > +}
> > +
> > +; Property Write
> > +; CHECK-LABEL:  .long   L{{.*}}-_propertyWrite
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .short  2
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a)
> {
> > +entry:
> > +  %resolveWrite = inttoptr i64 244837814094590 to i8*
> > +  call anyregcc void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveWrite, i32 2,
> i64* %obj, i64 %a)
> > +  ret void
> > +}
> > +
> > +; Void JS Call
> > +;
> > +; 2 live variables in registers.
> > +;
> > +; CHECK-LABEL:  .long   L{{.*}}-_jsVoidCall
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .short  2
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64
> %l2) {
> > +entry:
> > +  %resolveCall = inttoptr i64 244837814094590 to i8*
> > +  call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 7, i32 20, i8* %resolveCall, i32 2,
> i64* %obj, i64 %arg, i64 %l1, i64 %l2)
> > +  ret void
> > +}
> > +
> > +; i64 JS Call
> > +;
> > +; 2 live variables in registers.
> > +;
> > +; CHECK-LABEL:  .long   L{{.*}}-_jsIntCall
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .short  2
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +; CHECK-NEXT:   .byte   1
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  {{[0-9]+}}
> > +; CHECK-NEXT:   .long   0
> > +define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64
> %l2) {
> > +entry:
> > +  %resolveCall = inttoptr i64 244837814094590 to i8*
> > +  %result = call i64 (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.i64(i64 8, i32 20, i8* %resolveCall, i32 2,
> i64* %obj, i64 %arg, i64 %l1, i64 %l2)
> > +  %add = add i64 %result, 3
> > +  ret i64 %add
> > +}
> > +
> > +; Spilled stack map values.
> > +;
> > +; Verify 28 stack map entries.
> > +;
> > +; CHECK-LABEL:  .long L{{.*}}-_spilledValue
> > +; CHECK-NEXT:   .short 0
> > +; CHECK-NEXT:   .short 28
> > +;
> > +; Check that at least one is a spilled entry from RBP.
> > +; Location: Indirect FP + ...
> > +; CHECK:        .byte 3
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short 29
> > +define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3,
> i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6,
> i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64
> %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21,
> i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
> > +entry:
> > +  call void (i64, i32, i8*, i32, ...)*
> @llvm.experimental.patchpoint.void(i64 11, i32 20, i8* null, i32 5, i64
> %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64
> %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64
> %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17,
> i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64
> %l25, i64 %l26, i64 %l27)
> > +  ret void
> > +}
> > +
> > +; Spilled stack map values.
> > +;
> > +; Verify 23 stack map entries.
> > +;
> > +; CHECK-LABEL:  .long L{{.*}}-_spilledStackMapValue
> > +; CHECK-NEXT:   .short 0
> > +; CHECK-NEXT:   .short 30
> > +;
> > +; Check that at least one is a spilled entry from RBP.
> > +; Location: Indirect FP + ...
> > +; CHECK:        .byte 3
> > +; CHECK-NEXT:   .byte 8
> > +; CHECK-NEXT:   .short 29
> > +define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64
> %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64
> %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17,
> i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64
> %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
> > +entry:
> > +  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32
> 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7,
> i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64
> %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22,
> i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
> > +  ret void
> > +}
> > +
> > +
> > +; Map a constant value.
> > +;
> > +; CHECK-LABEL:  .long L{{.*}}-_liveConstant
> > +; CHECK-NEXT:   .short 0
> > +; 1 location
> > +; CHECK-NEXT:   .short 1
> > +; Loc 0: SmallConstant
> > +; CHECK-NEXT:   .byte   4
> > +; CHECK-NEXT:   .byte   8
> > +; CHECK-NEXT:   .short  0
> > +; CHECK-NEXT:   .long   33
> > +
> > +define void @liveConstant() {
> > +  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15,
> i32 8, i32 33)
> > +  ret void
> > +}
> > +
> > +; Map a value when LR is the only free register.
> > +;
> > +; CHECK-LABEL:  .long L{{.*}}-_clobberLR
> > +; CHECK-NEXT:   .short 0
> > +; 1 location
> > +; CHECK-NEXT:   .short 1
> > +; Loc 0: Indirect FP (r29) - offset
> > +; CHECK-NEXT:   .byte   3
> > +; CHECK-NEXT:   .byte   4
> > +; CHECK-NEXT:   .short  29
> > +; CHECK-NEXT:   .long   -{{[0-9]+}}
> > +define void @clobberLR(i32 %a) {
> > +  tail call void asm sideeffect "nop",
> "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x31}"()
> nounwind
> > +  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16,
> i32 8, i32 %a)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.experimental.stackmap(i64, i32, ...)
> > +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
> > +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/stacksave.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/stacksave.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/stacksave.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/stacksave.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,20 @@
> > +; RUN: llc < %s -verify-coalescing
> > +; <rdar://problem/11522048>
> > +target triple = "arm64-apple-macosx10.8.0"
> > +
> > +; Verify that we can handle spilling the stack pointer without
> attempting
> > +; spilling it directly.
> > +; CHECK: f
> > +; CHECK: mov [[X0:x[0-9]+]], sp
> > +; CHECK: str [[X0]]
> > +; CHECK: inlineasm
> > +define void @f() nounwind ssp {
> > +entry:
> > +  %savedstack = call i8* @llvm.stacksave() nounwind
> > +  call void asm sideeffect "; inlineasm",
> "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"()
> nounwind
> > +  call void @llvm.stackrestore(i8* %savedstack) nounwind
> > +  ret void
> > +}
> > +
> > +declare i8* @llvm.stacksave() nounwind
> > +declare void @llvm.stackrestore(i8*) nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/stp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/stp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/stp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/stp.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,101 @@
> > +; RUN: llc < %s -march=arm64 -arm64-stp-suppress=false
> -verify-machineinstrs | FileCheck %s
> > +; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\
> > +; RUN:   -verify-machineinstrs | FileCheck -check-prefix=STUR_CHK %s
> > +
> > +; CHECK: stp_int
> > +; CHECK: stp w0, w1, [x2]
> > +define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
> > +  store i32 %a, i32* %p, align 4
> > +  %add.ptr = getelementptr inbounds i32* %p, i64 1
> > +  store i32 %b, i32* %add.ptr, align 4
> > +  ret void
> > +}
> > +
> > +; CHECK: stp_long
> > +; CHECK: stp x0, x1, [x2]
> > +define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
> > +  store i64 %a, i64* %p, align 8
> > +  %add.ptr = getelementptr inbounds i64* %p, i64 1
> > +  store i64 %b, i64* %add.ptr, align 8
> > +  ret void
> > +}
> > +
> > +; CHECK: stp_float
> > +; CHECK: stp s0, s1, [x0]
> > +define void @stp_float(float %a, float %b, float* nocapture %p)
> nounwind {
> > +  store float %a, float* %p, align 4
> > +  %add.ptr = getelementptr inbounds float* %p, i64 1
> > +  store float %b, float* %add.ptr, align 4
> > +  ret void
> > +}
> > +
> > +; CHECK: stp_double
> > +; CHECK: stp d0, d1, [x0]
> > +define void @stp_double(double %a, double %b, double* nocapture %p)
> nounwind {
> > +  store double %a, double* %p, align 8
> > +  %add.ptr = getelementptr inbounds double* %p, i64 1
> > +  store double %b, double* %add.ptr, align 8
> > +  ret void
> > +}
> > +
> > +; Test the load/store optimizer---combine ldurs into a ldp, if
> appropriate
> > +define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
> > +; STUR_CHK: stur_int
> > +; STUR_CHK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
> > +; STUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds i32* %p, i32 -1
> > +  store i32 %a, i32* %p1, align 2
> > +  %p2 = getelementptr inbounds i32* %p, i32 -2
> > +  store i32 %b, i32* %p2, align 2
> > +  ret void
> > +}
> > +
> > +define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
> > +; STUR_CHK: stur_long
> > +; STUR_CHK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
> > +; STUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds i64* %p, i32 -1
> > +  store i64 %a, i64* %p1, align 2
> > +  %p2 = getelementptr inbounds i64* %p, i32 -2
> > +  store i64 %b, i64* %p2, align 2
> > +  ret void
> > +}
> > +
> > +define void @stur_float(float %a, float %b, float* nocapture %p)
> nounwind {
> > +; STUR_CHK: stur_float
> > +; STUR_CHK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
> > +; STUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds float* %p, i32 -1
> > +  store float %a, float* %p1, align 2
> > +  %p2 = getelementptr inbounds float* %p, i32 -2
> > +  store float %b, float* %p2, align 2
> > +  ret void
> > +}
> > +
> > +define void @stur_double(double %a, double %b, double* nocapture %p)
> nounwind {
> > +; STUR_CHK: stur_double
> > +; STUR_CHK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
> > +; STUR_CHK-NEXT: ret
> > +  %p1 = getelementptr inbounds double* %p, i32 -1
> > +  store double %a, double* %p1, align 2
> > +  %p2 = getelementptr inbounds double* %p, i32 -2
> > +  store double %b, double* %p2, align 2
> > +  ret void
> > +}
> > +
> > +define void @splat_v4i32(i32 %v, i32 *%p) {
> > +entry:
> > +
> > +; CHECK-LABEL: splat_v4i32
> > +; CHECK-DAG: stp w0, w0, [x1]
> > +; CHECK-DAG: stp w0, w0, [x1, #8]
> > +; CHECK: ret
> > +
> > +  %p17 = insertelement <4 x i32> undef, i32 %v, i32 0
> > +  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
> > +  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
> > +  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
> > +  %p21 = bitcast i32* %p to <4 x i32>*
> > +  store <4 x i32> %p20, <4 x i32>* %p21, align 4
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/strict-align.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/strict-align.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/strict-align.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/strict-align.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,25 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
> > +; RUN: llc < %s -mtriple=arm64-apple-darwin -arm64-strict-align |
> FileCheck %s --check-prefix=CHECK-STRICT
> > +
> > +define i32 @f0(i32* nocapture %p) nounwind {
> > +; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
> > +; CHECK-STRICT: ldrh [[LOW:w[0-9]+]], [x0]
> > +; CHECK-STRICT: orr w0, [[LOW]], [[HIGH]], lsl #16
> > +; CHECK-STRICT: ret
> > +
> > +; CHECK: ldr w0, [x0]
> > +; CHECK: ret
> > +  %tmp = load i32* %p, align 2
> > +  ret i32 %tmp
> > +}
> > +
> > +define i64 @f1(i64* nocapture %p) nounwind {
> > +; CHECK-STRICT:        ldp     w[[LOW:[0-9]+]], w[[HIGH:[0-9]+]], [x0]
> > +; CHECK-STRICT:        orr     x0, x[[LOW]], x[[HIGH]], lsl #32
> > +; CHECK-STRICT:        ret
> > +
> > +; CHECK: ldr x0, [x0]
> > +; CHECK: ret
> > +  %tmp = load i64* %p, align 4
> > +  ret i64 %tmp
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/stur.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/stur.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/stur.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/stur.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,98 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +%struct.X = type <{ i32, i64, i64 }>
> > +
> > +define void @foo1(i32* %p, i64 %val) nounwind {
> > +; CHECK-LABEL: foo1:
> > +; CHECK:       stur    w1, [x0, #-4]
> > +; CHECK-NEXT:  ret
> > +  %tmp1 = trunc i64 %val to i32
> > +  %ptr = getelementptr inbounds i32* %p, i64 -1
> > +  store i32 %tmp1, i32* %ptr, align 4
> > +  ret void
> > +}
> > +define void @foo2(i16* %p, i64 %val) nounwind {
> > +; CHECK-LABEL: foo2:
> > +; CHECK:       sturh   w1, [x0, #-2]
> > +; CHECK-NEXT:  ret
> > +  %tmp1 = trunc i64 %val to i16
> > +  %ptr = getelementptr inbounds i16* %p, i64 -1
> > +  store i16 %tmp1, i16* %ptr, align 2
> > +  ret void
> > +}
> > +define void @foo3(i8* %p, i64 %val) nounwind {
> > +; CHECK-LABEL: foo3:
> > +; CHECK:       sturb   w1, [x0, #-1]
> > +; CHECK-NEXT:  ret
> > +  %tmp1 = trunc i64 %val to i8
> > +  %ptr = getelementptr inbounds i8* %p, i64 -1
> > +  store i8 %tmp1, i8* %ptr, align 1
> > +  ret void
> > +}
> > +define void @foo4(i16* %p, i32 %val) nounwind {
> > +; CHECK-LABEL: foo4:
> > +; CHECK:       sturh   w1, [x0, #-2]
> > +; CHECK-NEXT:  ret
> > +  %tmp1 = trunc i32 %val to i16
> > +  %ptr = getelementptr inbounds i16* %p, i32 -1
> > +  store i16 %tmp1, i16* %ptr, align 2
> > +  ret void
> > +}
> > +define void @foo5(i8* %p, i32 %val) nounwind {
> > +; CHECK-LABEL: foo5:
> > +; CHECK:       sturb   w1, [x0, #-1]
> > +; CHECK-NEXT:  ret
> > +  %tmp1 = trunc i32 %val to i8
> > +  %ptr = getelementptr inbounds i8* %p, i32 -1
> > +  store i8 %tmp1, i8* %ptr, align 1
> > +  ret void
> > +}
> > +
> > +define void @foo(%struct.X* nocapture %p) nounwind optsize ssp {
> > +; CHECK-LABEL: foo:
> > +; CHECK-NOT: str
> > +; CHECK: stur    xzr, [x0, #12]
> > +; CHECK-NEXT: stur    xzr, [x0, #4]
> > +; CHECK-NEXT: ret
> > +  %B = getelementptr inbounds %struct.X* %p, i64 0, i32 1
> > +  %val = bitcast i64* %B to i8*
> > +  call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1
> false)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
> nounwind
> > +
> > +; Unaligned 16b stores are split into 8b stores for performance.
> > +; radar://15424193
> > +
> > +; CHECK-LABEL: unaligned:
> > +; CHECK-NOT: str q0
> > +; CHECK: str     d[[REG:[0-9]+]], [x0]
> > +; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
> > +; CHECK: str     d[[REG2]], [x0, #8]
> > +define void @unaligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
> > +  store <4 x i32> %v, <4 x i32>* %p, align 4
> > +  ret void
> > +}
> > +
> > +; CHECK-LABEL: aligned:
> > +; CHECK: str q0
> > +define void @aligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
> > +  store <4 x i32> %v, <4 x i32>* %p
> > +  ret void
> > +}
> > +
> > +; Don't split one and two byte aligned stores.
> > +; radar://16349308
> > +
> > +; CHECK-LABEL: twobytealign:
> > +; CHECK: str q0
> > +define void @twobytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
> > +  store <4 x i32> %v, <4 x i32>* %p, align 2
> > +  ret void
> > +}
> > +; CHECK-LABEL: onebytealign:
> > +; CHECK: str q0
> > +define void @onebytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
> > +  store <4 x i32> %v, <4 x i32>* %p, align 1
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/subvector-extend.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/subvector-extend.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/subvector-extend.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/subvector-extend.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,141 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -asm-verbose=false | FileCheck %s
> > +
> > +; Test efficient codegen of vector extends up from legal type to 128 bit
> > +; and 256 bit vector types.
> > +
> > +;-----
> > +; Vectors of i16.
> > +;-----
> > +define <8 x i16> @func1(<8 x i8> %v0) nounwind {
> > +; CHECK-LABEL: func1:
> > +; CHECK-NEXT: ushll.8h  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = zext <8 x i8> %v0 to <8 x i16>
> > +  ret <8 x i16> %r
> > +}
> > +
> > +define <8 x i16> @func2(<8 x i8> %v0) nounwind {
> > +; CHECK-LABEL: func2:
> > +; CHECK-NEXT: sshll.8h  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = sext <8 x i8> %v0 to <8 x i16>
> > +  ret <8 x i16> %r
> > +}
> > +
> > +define <16 x i16> @func3(<16 x i8> %v0) nounwind {
> > +; CHECK-LABEL: func3:
> > +; CHECK-NEXT: ushll2.8h  v1, v0, #0
> > +; CHECK-NEXT: ushll.8h  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = zext <16 x i8> %v0 to <16 x i16>
> > +  ret <16 x i16> %r
> > +}
> > +
> > +define <16 x i16> @func4(<16 x i8> %v0) nounwind {
> > +; CHECK-LABEL: func4:
> > +; CHECK-NEXT: sshll2.8h  v1, v0, #0
> > +; CHECK-NEXT: sshll.8h  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = sext <16 x i8> %v0 to <16 x i16>
> > +  ret <16 x i16> %r
> > +}
> > +
> > +;-----
> > +; Vectors of i32.
> > +;-----
> > +
> > +define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
> > +; CHECK-LABEL: afunc1:
> > +; CHECK-NEXT: ushll.4s v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = zext <4 x i16> %v0 to <4 x i32>
> > +  ret <4 x i32> %r
> > +}
> > +
> > +define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
> > +; CHECK-LABEL: afunc2:
> > +; CHECK-NEXT: sshll.4s v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = sext <4 x i16> %v0 to <4 x i32>
> > +  ret <4 x i32> %r
> > +}
> > +
> > +define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
> > +; CHECK-LABEL: afunc3:
> > +; CHECK-NEXT: ushll2.4s v1, v0, #0
> > +; CHECK-NEXT: ushll.4s v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = zext <8 x i16> %v0 to <8 x i32>
> > +  ret <8 x i32> %r
> > +}
> > +
> > +define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
> > +; CHECK-LABEL: afunc4:
> > +; CHECK-NEXT: sshll2.4s v1, v0, #0
> > +; CHECK-NEXT: sshll.4s v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = sext <8 x i16> %v0 to <8 x i32>
> > +  ret <8 x i32> %r
> > +}
> > +
> > +define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
> > +; CHECK-LABEL: bfunc1:
> > +; CHECK-NEXT: ushll.8h  v0, v0, #0
> > +; CHECK-NEXT: ushll2.4s v1, v0, #0
> > +; CHECK-NEXT: ushll.4s  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = zext <8 x i8> %v0 to <8 x i32>
> > +  ret <8 x i32> %r
> > +}
> > +
> > +define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
> > +; CHECK-LABEL: bfunc2:
> > +; CHECK-NEXT: sshll.8h  v0, v0, #0
> > +; CHECK-NEXT: sshll2.4s v1, v0, #0
> > +; CHECK-NEXT: sshll.4s  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = sext <8 x i8> %v0 to <8 x i32>
> > +  ret <8 x i32> %r
> > +}
> > +
> > +;-----
> > +; Vectors of i64.
> > +;-----
> > +
> > +define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
> > +; CHECK-LABEL: zfunc1:
> > +; CHECK-NEXT: ushll2.2d v1, v0, #0
> > +; CHECK-NEXT: ushll.2d v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = zext <4 x i32> %v0 to <4 x i64>
> > +  ret <4 x i64> %r
> > +}
> > +
> > +define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
> > +; CHECK-LABEL: zfunc2:
> > +; CHECK-NEXT: sshll2.2d v1, v0, #0
> > +; CHECK-NEXT: sshll.2d v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = sext <4 x i32> %v0 to <4 x i64>
> > +  ret <4 x i64> %r
> > +}
> > +
> > +define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
> > +; CHECK-LABEL: func3:
> > +; CHECK-NEXT: ushll.4s  v0, v0, #0
> > +; CHECK-NEXT: ushll2.2d v1, v0, #0
> > +; CHECK-NEXT: ushll.2d  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = zext <4 x i16> %v0 to <4 x i64>
> > +  ret <4 x i64> %r
> > +}
> > +
> > +define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
> > +; CHECK-LABEL: func4:
> > +; CHECK-NEXT: sshll.4s  v0, v0, #0
> > +; CHECK-NEXT: sshll2.2d v1, v0, #0
> > +; CHECK-NEXT: sshll.2d  v0, v0, #0
> > +; CHECK-NEXT: ret
> > +  %r = sext <4 x i16> %v0 to <4 x i64>
> > +  ret <4 x i64> %r
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,36 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
> > +; rdar://13214163 - Make sure we generate a correct lookup table for
> the TBL
> > +; instruction when the element size of the vector is not 8 bits. We were
> > +; getting both the endianness wrong and the element indexing wrong.
> > +define <8 x i16> @foo(<8 x i16> %a) nounwind readnone {
> > +; CHECK:       .section        __TEXT,__literal16,16byte_literals
> > +; CHECK:       .align  4
> > +; CHECK:lCPI0_0:
> > +; CHECK:       .byte   0                       ; 0x0
> > +; CHECK:       .byte   1                       ; 0x1
> > +; CHECK:       .byte   0                       ; 0x0
> > +; CHECK:       .byte   1                       ; 0x1
> > +; CHECK:       .byte   0                       ; 0x0
> > +; CHECK:       .byte   1                       ; 0x1
> > +; CHECK:       .byte   0                       ; 0x0
> > +; CHECK:       .byte   1                       ; 0x1
> > +; CHECK:       .byte   8                       ; 0x8
> > +; CHECK:       .byte   9                       ; 0x9
> > +; CHECK:       .byte   8                       ; 0x8
> > +; CHECK:       .byte   9                       ; 0x9
> > +; CHECK:       .byte   8                       ; 0x8
> > +; CHECK:       .byte   9                       ; 0x9
> > +; CHECK:       .byte   8                       ; 0x8
> > +; CHECK:       .byte   9                       ; 0x9
> > +; CHECK:       .section __TEXT,__text,regular,pure_instructions
> > +; CHECK:       .globl  _foo
> > +; CHECK:       .align  2
> > +; CHECK:_foo:                                   ; @foo
> > +; CHECK:       adrp    [[BASE:x[0-9]+]], lCPI0_0 at PAGE
> > +; CHECK:       ldr     q[[REG:[0-9]+]], {{\[}}[[BASE]], lCPI0_0 at PAGEOFF
> ]
> > +; CHECK:       tbl.16b v0, { v0 }, v[[REG]]
> > +; CHECK:       ret
> > +
> > +  %val = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0,
> i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
> > +  ret <8 x i16> %val
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/tbl.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/tbl.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/tbl.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/tbl.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,132 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
> > +; CHECK: tbl1_8b
> > +; CHECK: tbl.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8> %A, <8 x
> i8> %B)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
> > +; CHECK: tbl1_16b
> > +; CHECK: tbl.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8> %A, <16
> x i8> %B)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
> > +; CHECK: tbl2_8b
> > +; CHECK: tbl.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8> %A, <16 x
> i8> %B, <8 x i8> %C)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
> > +; CHECK: tbl2_16b
> > +; CHECK: tbl.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8> %A, <16
> x i8> %B, <16 x i8> %C)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x
> i8> %D) {
> > +; CHECK: tbl3_8b
> > +; CHECK: tbl.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8> %A, <16 x
> i8> %B, <16 x i8> %C, <8 x i8> %D)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C,
> <16 x i8> %D) {
> > +; CHECK: tbl3_16b
> > +; CHECK: tbl.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8> %A, <16
> x i8> %B, <16 x i8> %C, <16 x i8> %D)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16
> x i8> %D, <8 x i8> %E) {
> > +; CHECK: tbl4_8b
> > +; CHECK: tbl.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8> %A, <16 x
> i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C,
> <16 x i8> %D, <16 x i8> %E) {
> > +; CHECK: tbl4_16b
> > +; CHECK: tbl.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8> %A, <16
> x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>)
> nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)
> nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x
> i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16
> x i8>) nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x
> i8>, <8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16
> x i8>, <16 x i8>) nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x
> i8>, <16 x i8>, <8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16
> x i8>, <16 x i8>, <16 x i8>) nounwind readnone
> > +
> > +define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
> nounwind {
> > +; CHECK: tbx1_8b
> > +; CHECK: tbx.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8> %A, <16 x
> i8> %B, <8 x i8> %C)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
> nounwind {
> > +; CHECK: tbx1_16b
> > +; CHECK: tbx.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8> %A, <16
> x i8> %B, <16 x i8> %C)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x
> i8> %D) {
> > +; CHECK: tbx2_8b
> > +; CHECK: tbx.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8> %A, <16 x
> i8> %B, <16 x i8> %C, <8 x i8> %D)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C,
> <16 x i8> %D) {
> > +; CHECK: tbx2_16b
> > +; CHECK: tbx.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8> %A, <16
> x i8> %B, <16 x i8> %C, <16 x i8> %D)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x
> i8> %D, <8 x i8> %E) {
> > +; CHECK: tbx3_8b
> > +; CHECK: tbx.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x
> i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C,
> <16 x i8> %D, <16 x i8> %E) {
> > +; CHECK: tbx3_16b
> > +; CHECK: tbx.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8> %A, <16
> x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x
> i8> %D, <16 x i8> %E, <8 x i8> %F) {
> > +; CHECK: tbx4_8b
> > +; CHECK: tbx.8b
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8> %A, <16 x
> i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
> > +  ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C,
> <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
> > +; CHECK: tbx4_16b
> > +; CHECK: tbx.16b
> > +  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8> %A, <16
> x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
> > +  ret <16 x i8> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x
> i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16
> x i8>) nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x
> i8>, <8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16
> x i8>, <16 x i8>) nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x
> i8>, <16 x i8>, <8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16
> x i8>, <16 x i8>, <16 x i8>) nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x
> i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16
> x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/this-return.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/this-return.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/this-return.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/this-return.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,83 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +%struct.A = type { i8 }
> > +%struct.B = type { i32 }
> > +%struct.C = type { %struct.B }
> > +%struct.D = type { %struct.B }
> > +%struct.E = type { %struct.B, %struct.B }
> > +
> > +declare %struct.A* @A_ctor_base(%struct.A* returned)
> > +declare %struct.B* @B_ctor_base(%struct.B* returned, i32)
> > +declare %struct.B* @B_ctor_complete(%struct.B* returned, i32)
> > +
> > +declare %struct.A* @A_ctor_base_nothisret(%struct.A*)
> > +declare %struct.B* @B_ctor_base_nothisret(%struct.B*, i32)
> > +declare %struct.B* @B_ctor_complete_nothisret(%struct.B*, i32)
> > +
> > +define %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) {
> > +entry:
> > +; CHECK-LABEL: C_ctor_base:
> > +; CHECK-NOT: mov {{x[0-9]+}}, x0
> > +; CHECK: bl {{_?A_ctor_base}}
> > +; CHECK-NOT: mov x0, {{x[0-9]+}}
> > +; CHECK: b {{_?B_ctor_base}}
> > +  %0 = bitcast %struct.C* %this to %struct.A*
> > +  %call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
> > +  %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
> > +  %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
> > +  ret %struct.C* %this
> > +}
> > +
> > +define %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) {
> > +entry:
> > +; CHECK-LABEL: C_ctor_base_nothisret:
> > +; CHECK: mov [[SAVETHIS:x[0-9]+]], x0
> > +; CHECK: bl {{_?A_ctor_base_nothisret}}
> > +; CHECK: mov x0, [[SAVETHIS]]
> > +; CHECK-NOT: b {{_?B_ctor_base_nothisret}}
> > +  %0 = bitcast %struct.C* %this to %struct.A*
> > +  %call = tail call %struct.A* @A_ctor_base_nothisret(%struct.A* %0)
> > +  %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
> > +  %call2 = tail call %struct.B* @B_ctor_base_nothisret(%struct.B* %1,
> i32 %x)
> > +  ret %struct.C* %this
> > +}
> > +
> > +define %struct.C* @C_ctor_complete(%struct.C* %this, i32 %x) {
> > +entry:
> > +; CHECK-LABEL: C_ctor_complete:
> > +; CHECK: b {{_?C_ctor_base}}
> > +  %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x)
> > +  ret %struct.C* %this
> > +}
> > +
> > +define %struct.C* @C_ctor_complete_nothisret(%struct.C* %this, i32 %x) {
> > +entry:
> > +; CHECK-LABEL: C_ctor_complete_nothisret:
> > +; CHECK-NOT: b {{_?C_ctor_base_nothisret}}
> > +  %call = tail call %struct.C* @C_ctor_base_nothisret(%struct.C* %this,
> i32 %x)
> > +  ret %struct.C* %this
> > +}
> > +
> > +define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) {
> > +entry:
> > +; CHECK-LABEL: D_ctor_base:
> > +; CHECK-NOT: mov {{x[0-9]+}}, x0
> > +; CHECK: bl {{_?B_ctor_complete}}
> > +; CHECK-NOT: mov x0, {{x[0-9]+}}
> > +; CHECK: b {{_?B_ctor_complete}}
> > +  %b = getelementptr inbounds %struct.D* %this, i32 0, i32 0
> > +  %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
> > +  %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
> > +  ret %struct.D* %this
> > +}
> > +
> > +define %struct.E* @E_ctor_base(%struct.E* %this, i32 %x) {
> > +entry:
> > +; CHECK-LABEL: E_ctor_base:
> > +; CHECK-NOT: b {{_?B_ctor_complete}}
> > +  %b = getelementptr inbounds %struct.E* %this, i32 0, i32 0
> > +  %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
> > +  %b2 = getelementptr inbounds %struct.E* %this, i32 0, i32 1
> > +  %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x)
> > +  ret %struct.E* %this
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/tls-darwin.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/tls-darwin.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/tls-darwin.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/tls-darwin.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,18 @@
> > +; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
> > +
> > + at var = thread_local global i8 0
> > +
> > +; N.b. x0 must be the result of the first load (i.e. the address of the
> > +; descriptor) when tlv_get_addr is called. Likewise the result is
> returned in
> > +; x0.
> > +define i8 @get_var() {
> > +; CHECK-LABEL: get_var:
> > +; CHECK: adrp x[[TLVPDESC_SLOT_HI:[0-9]+]], _var at TLVPPAGE
> > +; CHECK: ldr x0, [x[[TLVPDESC_SLOT_HI]], _var at TLVPPAGEOFF]
> > +; CHECK: ldr [[TLV_GET_ADDR:x[0-9]+]], [x0]
> > +; CHECK: blr [[TLV_GET_ADDR]]
> > +; CHECK: ldrb w0, [x0]
> > +
> > +  %val = load i8* @var, align 1
> > +  ret i8 %val
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/tls-dynamic-together.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/tls-dynamic-together.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/tls-dynamic-together.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/tls-dynamic-together.ll Sat Mar 29
> 05:18:08 2014
> > @@ -0,0 +1,18 @@
> > +; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic
> -verify-machineinstrs < %s | FileCheck %s
> > +
> > +; If the .tlsdesccall and blr parts are emitted completely separately
> (even with
> > +; glue) then LLVM will separate them quite happily (with a spill at O0,
> hence
> > +; the option). This is definitely wrong, so we make sure they are
> emitted
> > +; together.
> > +
> > + at general_dynamic_var = external thread_local global i32
> > +
> > +define i32 @test_generaldynamic() {
> > +; CHECK-LABEL: test_generaldynamic:
> > +
> > +  %val = load i32* @general_dynamic_var
> > +  ret i32 %val
> > +
> > +; CHECK: .tlsdesccall general_dynamic_var
> > +; CHECK-NEXT: blr {{x[0-9]+}}
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/tls-dynamics.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/tls-dynamics.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/tls-dynamics.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/tls-dynamics.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,135 @@
> > +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic
> -verify-machineinstrs < %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic
> -filetype=obj < %s | llvm-objdump -r - | FileCheck
> --check-prefix=CHECK-RELOC %s
> > +
> > + at general_dynamic_var = external thread_local global i32
> > +
> > +define i32 @test_generaldynamic() {
> > +; CHECK-LABEL: test_generaldynamic:
> > +
> > +  %val = load i32* @general_dynamic_var
> > +  ret i32 %val
> > +
> > +  ; FIXME: the adrp instructions are redundant (if harmless).
> > +; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
> > +; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
> > +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]],
> :tlsdesc_lo12:general_dynamic_var]
> > +; CHECK: .tlsdesccall general_dynamic_var
> > +; CHECK-NEXT: blr [[CALLEE]]
> > +
> > +; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
> > +; CHECK: ldr w0, [x[[TP]], x0]
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
> > +
> > +}
> > +
> > +define i32* @test_generaldynamic_addr() {
> > +; CHECK-LABEL: test_generaldynamic_addr:
> > +
> > +  ret i32* @general_dynamic_var
> > +
> > +  ; FIXME: the adrp instructions are redundant (if harmless).
> > +; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
> > +; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
> > +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]],
> :tlsdesc_lo12:general_dynamic_var]
> > +; CHECK: .tlsdesccall general_dynamic_var
> > +; CHECK-NEXT: blr [[CALLEE]]
> > +
> > +; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
> > +; CHECK: add x0, [[TP]], x0
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
> > +}
> > +
> > + at local_dynamic_var = external thread_local(localdynamic) global i32
> > +
> > +define i32 @test_localdynamic() {
> > +; CHECK-LABEL: test_localdynamic:
> > +
> > +  %val = load i32* @local_dynamic_var
> > +  ret i32 %val
> > +
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
> > +; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
> > +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]],
> :tlsdesc_lo12:_TLS_MODULE_BASE_]
> > +; CHECK: .tlsdesccall _TLS_MODULE_BASE_
> > +; CHECK-NEXT: blr [[CALLEE]]
> > +
> > +; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
> > +; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
> > +
> > +; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]]
> > +
> > +; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
> > +
> > +; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]]
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
> > +
> > +}
> > +
> > +define i32* @test_localdynamic_addr() {
> > +; CHECK-LABEL: test_localdynamic_addr:
> > +
> > +  ret i32* @local_dynamic_var
> > +
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
> > +; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
> > +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]],
> :tlsdesc_lo12:_TLS_MODULE_BASE_]
> > +; CHECK: .tlsdesccall _TLS_MODULE_BASE_
> > +; CHECK-NEXT: blr [[CALLEE]]
> > +
> > +; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
> > +; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
> > +
> > +; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]]
> > +
> > +; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0
> > +
> > +; CHECK: add x0, [[TPIDR]], [[TPREL]]
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
> > +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
> > +
> > +}
> > +
> > +; The entire point of the local-dynamic access model is to have a
> single call to
> > +; the expensive resolver. Make sure we achieve that goal.
> > +
> > + at local_dynamic_var2 = external thread_local(localdynamic) global i32
> > +
> > +define i32 @test_localdynamic_deduplicate() {
> > +; CHECK-LABEL: test_localdynamic_deduplicate:
> > +
> > +  %val = load i32* @local_dynamic_var
> > +  %val2 = load i32* @local_dynamic_var2
> > +
> > +  %sum = add i32 %val, %val2
> > +  ret i32 %sum
> > +
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
> > +; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
> > +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
> > +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]],
> :tlsdesc_lo12:_TLS_MODULE_BASE_]
> > +; CHECK: .tlsdesccall _TLS_MODULE_BASE_
> > +; CHECK-NEXT: blr [[CALLEE]]
> > +
> > +; CHECK-NOT: _TLS_MODULE_BASE_
> > +
> > +; CHECK: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/tls-execs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/tls-execs.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/tls-execs.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/tls-execs.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,63 @@
> > +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs
> -show-mc-encoding < %s | FileCheck %s
> > +; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s |
> llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
> > +
> > + at initial_exec_var = external thread_local(initialexec) global i32
> > +
> > +define i32 @test_initial_exec() {
> > +; CHECK-LABEL: test_initial_exec:
> > +  %val = load i32* @initial_exec_var
> > +
> > +; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
> > +; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]],
> :gottprel_lo12:initial_exec_var]
> > +; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
> > +; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]]
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
> > +; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
> > +
> > +  ret i32 %val
> > +}
> > +
> > +define i32* @test_initial_exec_addr() {
> > +; CHECK-LABEL: test_initial_exec_addr:
> > +  ret i32* @initial_exec_var
> > +
> > +; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
> > +; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]],
> :gottprel_lo12:initial_exec_var]
> > +; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
> > +; CHECK: add x0, [[TP]], [[TP_OFFSET]]
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
> > +; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
> > +
> > +}
> > +
> > + at local_exec_var = thread_local(localexec) global i32 0
> > +
> > +define i32 @test_local_exec() {
> > +; CHECK-LABEL: test_local_exec:
> > +  %val = load i32* @local_exec_var
> > +
> > +; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var //
> encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92]
> > +; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
> > +; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
> > +; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
> > +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
> > +
> > +  ret i32 %val
> > +}
> > +
> > +define i32* @test_local_exec_addr() {
> > +; CHECK-LABEL: test_local_exec_addr:
> > +  ret i32* @local_exec_var
> > +
> > +; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
> > +; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
> > +; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
> > +; CHECK: add x0, [[TP]], [[TP_OFFSET]]
> > +
> > +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
> > +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/trap.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/trap.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/trap.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/trap.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,8 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +define void @foo() nounwind {
> > +; CHECK: foo
> > +; CHECK: brk #1
> > +  tail call void @llvm.trap()
> > +  ret void
> > +}
> > +declare void @llvm.trap() nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/trn.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/trn.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/trn.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/trn.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,134 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: vtrni8:
> > +;CHECK: trn1.8b
> > +;CHECK: trn2.8b
> > +;CHECK-NEXT: add.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = load <8 x i8>* %B
> > +       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
> > +       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
> > +        %tmp5 = add <8 x i8> %tmp3, %tmp4
> > +       ret <8 x i8> %tmp5
> > +}
> > +
> > +define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: vtrni16:
> > +;CHECK: trn1.4h
> > +;CHECK: trn2.4h
> > +;CHECK-NEXT: add.4h
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp2 = load <4 x i16>* %B
> > +       %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x
> i32> <i32 0, i32 4, i32 2, i32 6>
> > +       %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x
> i32> <i32 1, i32 5, i32 3, i32 7>
> > +        %tmp5 = add <4 x i16> %tmp3, %tmp4
> > +       ret <4 x i16> %tmp5
> > +}
> > +
> > +; 2xi32 TRN is redundant with ZIP
> > +define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: vtrni32:
> > +;CHECK: zip1.2s
> > +;CHECK: zip2.2s
> > +;CHECK-NEXT: add.2s
> > +       %tmp1 = load <2 x i32>* %A
> > +       %tmp2 = load <2 x i32>* %B
> > +       %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x
> i32> <i32 0, i32 2>
> > +       %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x
> i32> <i32 1, i32 3>
> > +        %tmp5 = add <2 x i32> %tmp3, %tmp4
> > +       ret <2 x i32> %tmp5
> > +}
> > +
> > +define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
> > +;CHECK-LABEL: vtrnf:
> > +;CHECK: zip1.2s
> > +;CHECK: zip2.2s
> > +;CHECK-NEXT: fadd.2s
> > +       %tmp1 = load <2 x float>* %A
> > +       %tmp2 = load <2 x float>* %B
> > +       %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x
> i32> <i32 0, i32 2>
> > +       %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x
> i32> <i32 1, i32 3>
> > +        %tmp5 = fadd <2 x float> %tmp3, %tmp4
> > +       ret <2 x float> %tmp5
> > +}
> > +
> > +define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: vtrnQi8:
> > +;CHECK: trn1.16b
> > +;CHECK: trn2.16b
> > +;CHECK-NEXT: add.16b
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = load <16 x i8>* %B
> > +       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x
> i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8,
> i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
> > +       %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x
> i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9,
> i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
> > +        %tmp5 = add <16 x i8> %tmp3, %tmp4
> > +       ret <16 x i8> %tmp5
> > +}
> > +
> > +define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: vtrnQi16:
> > +;CHECK: trn1.8h
> > +;CHECK: trn2.8h
> > +;CHECK-NEXT: add.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp2 = load <8 x i16>* %B
> > +       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
> > +       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +       ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: vtrnQi32:
> > +;CHECK: trn1.4s
> > +;CHECK: trn2.4s
> > +;CHECK-NEXT: add.4s
> > +       %tmp1 = load <4 x i32>* %A
> > +       %tmp2 = load <4 x i32>* %B
> > +       %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x
> i32> <i32 0, i32 4, i32 2, i32 6>
> > +       %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x
> i32> <i32 1, i32 5, i32 3, i32 7>
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +       ret <4 x i32> %tmp5
> > +}
> > +
> > +define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
> > +;CHECK-LABEL: vtrnQf:
> > +;CHECK: trn1.4s
> > +;CHECK: trn2.4s
> > +;CHECK-NEXT: fadd.4s
> > +       %tmp1 = load <4 x float>* %A
> > +       %tmp2 = load <4 x float>* %B
> > +       %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x
> i32> <i32 0, i32 4, i32 2, i32 6>
> > +       %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x
> i32> <i32 1, i32 5, i32 3, i32 7>
> > +        %tmp5 = fadd <4 x float> %tmp3, %tmp4
> > +       ret <4 x float> %tmp5
> > +}
> > +
> > +; Undef shuffle indices should not prevent matching to VTRN:
> > +
> > +define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: vtrni8_undef:
> > +;CHECK: trn1.8b
> > +;CHECK: trn2.8b
> > +;CHECK-NEXT: add.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = load <8 x i8>* %B
> > +       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
> > +       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
> > +        %tmp5 = add <8 x i8> %tmp3, %tmp4
> > +       ret <8 x i8> %tmp5
> > +}
> > +
> > +define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind
> {
> > +;CHECK-LABEL: vtrnQi16_undef:
> > +;CHECK: trn1.8h
> > +;CHECK: trn2.8h
> > +;CHECK-NEXT: add.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp2 = load <8 x i16>* %B
> > +       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
> > +       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +       ret <8 x i16> %tmp5
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/trunc-store.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/trunc-store.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/trunc-store.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/trunc-store.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,75 @@
> > +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
> > +
> > +define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind {
> > +; CHECK-LABEL: bar:
> > +; CHECK: xtn.8b v[[REG:[0-9]+]], v0
> > +; CHECK-NEXT: str d[[REG]], [x0]
> > +; CHECK-NEXT: ret
> > +  %tmp = trunc <8 x i16> %arg to <8 x i8>
> > +  store <8 x i8> %tmp, <8 x i8>* %p, align 8
> > +  ret void
> > +}
> > +
> > + at zptr8 = common global i8* null, align 8
> > + at zptr16 = common global i16* null, align 8
> > + at zptr32 = common global i32* null, align 8
> > +
> > +define void @fct32(i32 %arg, i64 %var) {
> > +; CHECK: fct32
> > +; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr32 at GOTPAGE
> > +; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]],
> _zptr32 at GOTPAGEOFF]
> > +; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
> > +; w0 is %arg
> > +; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
> > +; w1 is %var truncated
> > +; CHECK-NEXT: str w1, {{\[}}[[GLOBALADDR]], x[[OFFSETREGNUM]], sxtw #2]
> > +; CHECK-NEXT: ret
> > +bb:
> > +  %.pre37 = load i32** @zptr32, align 8
> > +  %dec = add nsw i32 %arg, -1
> > +  %idxprom8 = sext i32 %dec to i64
> > +  %arrayidx9 = getelementptr inbounds i32* %.pre37, i64 %idxprom8
> > +  %tmp = trunc i64 %var to i32
> > +  store i32 %tmp, i32* %arrayidx9, align 4
> > +  ret void
> > +}
> > +
> > +define void @fct16(i32 %arg, i64 %var) {
> > +; CHECK: fct16
> > +; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr16 at GOTPAGE
> > +; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]],
> _zptr16 at GOTPAGEOFF]
> > +; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
> > +; w0 is %arg
> > +; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
> > +; w1 is %var truncated
> > +; CHECK-NEXT: strh w1, {{\[}}[[GLOBALADDR]], x[[OFFSETREGNUM]], sxtw #1]
> > +; CHECK-NEXT: ret
> > +bb:
> > +  %.pre37 = load i16** @zptr16, align 8
> > +  %dec = add nsw i32 %arg, -1
> > +  %idxprom8 = sext i32 %dec to i64
> > +  %arrayidx9 = getelementptr inbounds i16* %.pre37, i64 %idxprom8
> > +  %tmp = trunc i64 %var to i16
> > +  store i16 %tmp, i16* %arrayidx9, align 4
> > +  ret void
> > +}
> > +
> > +define void @fct8(i32 %arg, i64 %var) {
> > +; CHECK: fct8
> > +; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr8 at GOTPAGE
> > +; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]],
> _zptr8 at GOTPAGEOFF]
> > +; CHECK: ldr [[BASEADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
> > +; w0 is %arg
> > +; CHECK-NEXT: add [[ADDR:x[0-9]+]], [[BASEADDR]], w0, sxtw
> > +; w1 is %var truncated
> > +; CHECK-NEXT: sturb w1, {{\[}}[[ADDR]], #-1]
> > +; CHECK-NEXT: ret
> > +bb:
> > +  %.pre37 = load i8** @zptr8, align 8
> > +  %dec = add nsw i32 %arg, -1
> > +  %idxprom8 = sext i32 %dec to i64
> > +  %arrayidx9 = getelementptr inbounds i8* %.pre37, i64 %idxprom8
> > +  %tmp = trunc i64 %var to i8
> > +  store i8 %tmp, i8* %arrayidx9, align 4
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/umaxv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/umaxv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/umaxv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/umaxv.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,92 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp {
> > +; CHECK-LABEL: vmax_u8x8:
> > +; CHECK: umaxv.8b        b[[REG:[0-9]+]], v0
> > +; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %a)
> nounwind
> > +  %tmp = trunc i32 %vmaxv.i to i8
> > +  %tobool = icmp eq i8 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +declare i32 @bar(...)
> > +
> > +define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp {
> > +; CHECK-LABEL: vmax_u4x16:
> > +; CHECK: umaxv.4h        h[[REG:[0-9]+]], v0
> > +; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>
> %a) nounwind
> > +  %tmp = trunc i32 %vmaxv.i to i16
> > +  %tobool = icmp eq i16 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp {
> > +; CHECK-LABEL: vmax_u8x16:
> > +; CHECK: umaxv.8h        h[[REG:[0-9]+]], v0
> > +; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>
> %a) nounwind
> > +  %tmp = trunc i32 %vmaxv.i to i16
> > +  %tobool = icmp eq i16 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp {
> > +; CHECK-LABEL: vmax_u16x8:
> > +; CHECK: umaxv.16b        b[[REG:[0-9]+]], v0
> > +; CHECK: fmov     [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>
> %a) nounwind
> > +  %tmp = trunc i32 %vmaxv.i to i8
> > +  %tobool = icmp eq i8 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind
> readnone
> > +declare i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind
> readnone
> > +declare i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind
> readnone
> > +declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/uminv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/uminv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/uminv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/uminv.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,92 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
> > +; CHECK-LABEL: vmin_u8x8:
> > +; CHECK: uminv.8b        b[[REG:[0-9]+]], v0
> > +; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %a)
> nounwind
> > +  %tmp = trunc i32 %vminv.i to i8
> > +  %tobool = icmp eq i8 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +declare i32 @bar(...)
> > +
> > +define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp {
> > +; CHECK-LABEL: vmin_u4x16:
> > +; CHECK: uminv.4h        h[[REG:[0-9]+]], v0
> > +; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>
> %a) nounwind
> > +  %tmp = trunc i32 %vminv.i to i16
> > +  %tobool = icmp eq i16 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp {
> > +; CHECK-LABEL: vmin_u8x16:
> > +; CHECK: uminv.8h        h[[REG:[0-9]+]], v0
> > +; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>
> %a) nounwind
> > +  %tmp = trunc i32 %vminv.i to i16
> > +  %tobool = icmp eq i16 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp {
> > +; CHECK-LABEL: vmin_u16x8:
> > +; CHECK: uminv.16b        b[[REG:[0-9]+]], v0
> > +; CHECK: fmov     [[REG2:w[0-9]+]], s[[REG]]
> > +; CHECK-NOT: and
> > +; CHECK: cbz     [[REG2]],
> > +entry:
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>
> %a) nounwind
> > +  %tmp = trunc i32 %vminv.i to i8
> > +  %tobool = icmp eq i8 %tmp, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
> > +  br label %return
> > +
> > +return:
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) nounwind
> readnone
> > +declare i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>) nounwind
> readnone
> > +declare i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>) nounwind
> readnone
> > +declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/umov.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/umov.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/umov.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/umov.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,33 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define zeroext i8 @f1(<16 x i8> %a) {
> > +; CHECK-LABEL: f1:
> > +; CHECK: umov.b w0, v0[3]
> > +; CHECK-NEXT: ret
> > +  %vecext = extractelement <16 x i8> %a, i32 3
> > +  ret i8 %vecext
> > +}
> > +
> > +define zeroext i16 @f2(<4 x i16> %a) {
> > +; CHECK-LABEL: f2:
> > +; CHECK: umov.h w0, v0[2]
> > +; CHECK-NEXT: ret
> > +  %vecext = extractelement <4 x i16> %a, i32 2
> > +  ret i16 %vecext
> > +}
> > +
> > +define i32 @f3(<2 x i32> %a) {
> > +; CHECK-LABEL: f3:
> > +; CHECK: umov.s w0, v0[1]
> > +; CHECK-NEXT: ret
> > +  %vecext = extractelement <2 x i32> %a, i32 1
> > +  ret i32 %vecext
> > +}
> > +
> > +define i64 @f4(<2 x i64> %a) {
> > +; CHECK-LABEL: f4:
> > +; CHECK: umov.d x0, v0[1]
> > +; CHECK-NEXT: ret
> > +  %vecext = extractelement <2 x i64> %a, i32 1
> > +  ret i64 %vecext
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/unaligned_ldst.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/unaligned_ldst.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/unaligned_ldst.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/unaligned_ldst.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,41 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +; rdar://r11231896
> > +
> > +define void @t1(i8* nocapture %a, i8* nocapture %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK-NOT: orr
> > +; CHECK: ldr [[X0:x[0-9]+]], [x1]
> > +; CHECK: str [[X0]], [x0]
> > +  %tmp1 = bitcast i8* %b to i64*
> > +  %tmp2 = bitcast i8* %a to i64*
> > +  %tmp3 = load i64* %tmp1, align 1
> > +  store i64 %tmp3, i64* %tmp2, align 1
> > +  ret void
> > +}
> > +
> > +define void @t2(i8* nocapture %a, i8* nocapture %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK-NOT: orr
> > +; CHECK: ldr [[W0:w[0-9]+]], [x1]
> > +; CHECK: str [[W0]], [x0]
> > +  %tmp1 = bitcast i8* %b to i32*
> > +  %tmp2 = bitcast i8* %a to i32*
> > +  %tmp3 = load i32* %tmp1, align 1
> > +  store i32 %tmp3, i32* %tmp2, align 1
> > +  ret void
> > +}
> > +
> > +define void @t3(i8* nocapture %a, i8* nocapture %b) nounwind {
> > +entry:
> > +; CHECK-LABEL: t3:
> > +; CHECK-NOT: orr
> > +; CHECK: ldrh [[W0:w[0-9]+]], [x1]
> > +; CHECK: strh [[W0]], [x0]
> > +  %tmp1 = bitcast i8* %b to i16*
> > +  %tmp2 = bitcast i8* %a to i16*
> > +  %tmp3 = load i16* %tmp1, align 1
> > +  store i16 %tmp3, i16* %tmp2, align 1
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/uzp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/uzp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/uzp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/uzp.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,107 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: vuzpi8:
> > +;CHECK: uzp1.8b
> > +;CHECK: uzp2.8b
> > +;CHECK-NEXT: add.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = load <8 x i8>* %B
> > +       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
> > +       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
> > +        %tmp5 = add <8 x i8> %tmp3, %tmp4
> > +       ret <8 x i8> %tmp5
> > +}
> > +
> > +define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: vuzpi16:
> > +;CHECK: uzp1.4h
> > +;CHECK: uzp2.4h
> > +;CHECK-NEXT: add.4h
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp2 = load <4 x i16>* %B
> > +       %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x
> i32> <i32 0, i32 2, i32 4, i32 6>
> > +       %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x
> i32> <i32 1, i32 3, i32 5, i32 7>
> > +        %tmp5 = add <4 x i16> %tmp3, %tmp4
> > +       ret <4 x i16> %tmp5
> > +}
> > +
> > +define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: vuzpQi8:
> > +;CHECK: uzp1.16b
> > +;CHECK: uzp2.16b
> > +;CHECK-NEXT: add.16b
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp2 = load <16 x i8>* %B
> > +       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x
> i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16,
> i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
> > +       %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x
> i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17,
> i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
> > +        %tmp5 = add <16 x i8> %tmp3, %tmp4
> > +       ret <16 x i8> %tmp5
> > +}
> > +
> > +define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: vuzpQi16:
> > +;CHECK: uzp1.8h
> > +;CHECK: uzp2.8h
> > +;CHECK-NEXT: add.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp2 = load <8 x i16>* %B
> > +       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
> > +       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +       ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: vuzpQi32:
> > +;CHECK: uzp1.4s
> > +;CHECK: uzp2.4s
> > +;CHECK-NEXT: add.4s
> > +       %tmp1 = load <4 x i32>* %A
> > +       %tmp2 = load <4 x i32>* %B
> > +       %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x
> i32> <i32 0, i32 2, i32 4, i32 6>
> > +       %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x
> i32> <i32 1, i32 3, i32 5, i32 7>
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +       ret <4 x i32> %tmp5
> > +}
> > +
> > +define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
> > +;CHECK-LABEL: vuzpQf:
> > +;CHECK: uzp1.4s
> > +;CHECK: uzp2.4s
> > +;CHECK-NEXT: fadd.4s
> > +       %tmp1 = load <4 x float>* %A
> > +       %tmp2 = load <4 x float>* %B
> > +       %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x
> i32> <i32 0, i32 2, i32 4, i32 6>
> > +       %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x
> i32> <i32 1, i32 3, i32 5, i32 7>
> > +        %tmp5 = fadd <4 x float> %tmp3, %tmp4
> > +       ret <4 x float> %tmp5
> > +}
> > +
> > +; Undef shuffle indices should not prevent matching to VUZP:
> > +
> > +define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: vuzpi8_undef:
> > +;CHECK: uzp1.8b
> > +;CHECK: uzp2.8b
> > +;CHECK-NEXT: add.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp2 = load <8 x i8>* %B
> > +       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
> > +       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
> > +        %tmp5 = add <8 x i8> %tmp3, %tmp4
> > +       ret <8 x i8> %tmp5
> > +}
> > +
> > +define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind
> {
> > +;CHECK-LABEL: vuzpQi16_undef:
> > +;CHECK: uzp1.8h
> > +;CHECK: uzp2.8h
> > +;CHECK-NEXT: add.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp2 = load <8 x i16>* %B
> > +       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
> > +       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x
> i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +       ret <8 x i16> %tmp5
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vaargs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vaargs.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vaargs.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vaargs.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,20 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
> > +target triple = "arm64-apple-darwin11.0.0"
> > +
> > +define float @t1(i8* nocapture %fmt, ...) nounwind ssp {
> > +entry:
> > +; CHECK: t1
> > +; CHECK: fcvt
> > +  %argp = alloca i8*, align 8
> > +  %argp1 = bitcast i8** %argp to i8*
> > +  call void @llvm.va_start(i8* %argp1)
> > +  %0 = va_arg i8** %argp, i32
> > +  %1 = va_arg i8** %argp, float
> > +  call void @llvm.va_end(i8* %argp1)
> > +  ret float %1
> > +}
> > +
> > +declare void @llvm.va_start(i8*) nounwind
> > +
> > +declare void @llvm.va_end(i8*) nounwind
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vabs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vabs.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vabs.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vabs.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,796 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +
> > +define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: sabdl8h:
> > +;CHECK: sabdl.8h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
> > +        ret <8 x i16> %tmp4
> > +}
> > +
> > +define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: sabdl4s:
> > +;CHECK: sabdl.4s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
> > +        ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: sabdl2d:
> > +;CHECK: sabdl.2d
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
> > +        ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: sabdl2_8h:
> > +;CHECK: sabdl2.8h
> > +        %load1 = load <16 x i8>* %A
> > +        %load2 = load <16 x i8>* %B
> > +        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
> > +        ret <8 x i16> %tmp4
> > +}
> > +
> > +define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: sabdl2_4s:
> > +;CHECK: sabdl2.4s
> > +        %load1 = load <8 x i16>* %A
> > +        %load2 = load <8 x i16>* %B
> > +        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
> > +        ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: sabdl2_2d:
> > +;CHECK: sabdl2.2d
> > +        %load1 = load <4 x i32>* %A
> > +        %load2 = load <4 x i32>* %B
> > +        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
> > +        ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: uabdl8h:
> > +;CHECK: uabdl.8h
> > +  %tmp1 = load <8 x i8>* %A
> > +  %tmp2 = load <8 x i8>* %B
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x
> i8> %tmp2)
> > +  %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
> > +  ret <8 x i16> %tmp4
> > +}
> > +
> > +define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uabdl4s:
> > +;CHECK: uabdl.4s
> > +  %tmp1 = load <4 x i16>* %A
> > +  %tmp2 = load <4 x i16>* %B
> > +  %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1,
> <4 x i16> %tmp2)
> > +  %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
> > +  ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uabdl2d:
> > +;CHECK: uabdl.2d
> > +  %tmp1 = load <2 x i32>* %A
> > +  %tmp2 = load <2 x i32>* %B
> > +  %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1,
> <2 x i32> %tmp2)
> > +  %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
> > +  ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: uabdl2_8h:
> > +;CHECK: uabdl2.8h
> > +  %load1 = load <16 x i8>* %A
> > +  %load2 = load <16 x i8>* %B
> > +  %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32>
> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +  %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32>
> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +
> > +  %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x
> i8> %tmp2)
> > +  %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
> > +  ret <8 x i16> %tmp4
> > +}
> > +
> > +define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uabdl2_4s:
> > +;CHECK: uabdl2.4s
> > +  %load1 = load <8 x i16>* %A
> > +  %load2 = load <8 x i16>* %B
> > +  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> > +  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> > +  %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1,
> <4 x i16> %tmp2)
> > +  %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
> > +  ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uabdl2_2d:
> > +;CHECK: uabdl2.2d
> > +  %load1 = load <4 x i32>* %A
> > +  %load2 = load <4 x i32>* %B
> > +  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +  %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1,
> <2 x i32> %tmp2)
> > +  %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
> > +  ret <2 x i64> %tmp4
> > +}
> > +
> > +define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
> > +;CHECK-LABEL: fabd_2s:
> > +;CHECK: fabd.2s
> > +        %tmp1 = load <2 x float>* %A
> > +        %tmp2 = load <2 x float>* %B
> > +        %tmp3 = call <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x
> float> %tmp1, <2 x float> %tmp2)
> > +        ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
> > +;CHECK-LABEL: fabd_4s:
> > +;CHECK: fabd.4s
> > +        %tmp1 = load <4 x float>* %A
> > +        %tmp2 = load <4 x float>* %B
> > +        %tmp3 = call <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x
> float> %tmp1, <4 x float> %tmp2)
> > +        ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B)
> nounwind {
> > +;CHECK-LABEL: fabd_2d:
> > +;CHECK: fabd.2d
> > +        %tmp1 = load <2 x double>* %A
> > +        %tmp2 = load <2 x double>* %B
> > +        %tmp3 = call <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x
> double> %tmp1, <2 x double> %tmp2)
> > +        ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float>, <2 x
> float>) nounwind readnone
> > +declare <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float>, <4 x
> float>) nounwind readnone
> > +declare <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double>, <2 x
> double>) nounwind readnone
> > +
> > +define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: sabd_8b:
> > +;CHECK: sabd.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: sabd_16b:
> > +;CHECK: sabd.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp2 = load <16 x i8>* %B
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>
> %tmp1, <16 x i8> %tmp2)
> > +        ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: sabd_4h:
> > +;CHECK: sabd.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: sabd_8h:
> > +;CHECK: sabd.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>
> %tmp1, <8 x i16> %tmp2)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: sabd_2s:
> > +;CHECK: sabd.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: sabd_4s:
> > +;CHECK: sabd.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>
> %tmp1, <4 x i32> %tmp2)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
> nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>, <16 x i8>)
> nounwind readnone
> > +declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
> nounwind readnone
> > +declare <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
> nounwind readnone
> > +declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>, <4 x i32>)
> nounwind readnone
> > +
> > +define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: uabd_8b:
> > +;CHECK: uabd.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: uabd_16b:
> > +;CHECK: uabd.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp2 = load <16 x i8>* %B
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>
> %tmp1, <16 x i8> %tmp2)
> > +        ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uabd_4h:
> > +;CHECK: uabd.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uabd_8h:
> > +;CHECK: uabd.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>
> %tmp1, <8 x i16> %tmp2)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uabd_2s:
> > +;CHECK: uabd.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uabd_4s:
> > +;CHECK: uabd.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>
> %tmp1, <4 x i32> %tmp2)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
> nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>, <16 x i8>)
> nounwind readnone
> > +declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
> nounwind readnone
> > +declare <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
> nounwind readnone
> > +declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>, <4 x i32>)
> nounwind readnone
> > +
> > +define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: sqabs_8b:
> > +;CHECK: sqabs.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8>
> %tmp1)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: sqabs_16b:
> > +;CHECK: sqabs.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8>
> %tmp1)
> > +        ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: sqabs_4h:
> > +;CHECK: sqabs.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16>
> %tmp1)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: sqabs_8h:
> > +;CHECK: sqabs.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16>
> %tmp1)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: sqabs_2s:
> > +;CHECK: sqabs.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32>
> %tmp1)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
> > +;CHECK-LABEL: sqabs_4s:
> > +;CHECK: sqabs.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32>
> %tmp1)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8>) nounwind
> readnone
> > +declare <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16>) nounwind
> readnone
> > +declare <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16>) nounwind
> readnone
> > +declare <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32>) nounwind
> readnone
> > +declare <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32>) nounwind
> readnone
> > +
> > +define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: sqneg_8b:
> > +;CHECK: sqneg.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8>
> %tmp1)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: sqneg_16b:
> > +;CHECK: sqneg.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8>
> %tmp1)
> > +        ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: sqneg_4h:
> > +;CHECK: sqneg.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16>
> %tmp1)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: sqneg_8h:
> > +;CHECK: sqneg.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16>
> %tmp1)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: sqneg_2s:
> > +;CHECK: sqneg.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32>
> %tmp1)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
> > +;CHECK-LABEL: sqneg_4s:
> > +;CHECK: sqneg.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32>
> %tmp1)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8>) nounwind
> readnone
> > +declare <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16>) nounwind
> readnone
> > +declare <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16>) nounwind
> readnone
> > +declare <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32>) nounwind
> readnone
> > +declare <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32>) nounwind
> readnone
> > +
> > +define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: abs_8b:
> > +;CHECK: abs.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8> %tmp1)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: abs_16b:
> > +;CHECK: abs.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8>
> %tmp1)
> > +        ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: abs_4h:
> > +;CHECK: abs.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16>
> %tmp1)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: abs_8h:
> > +;CHECK: abs.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16>
> %tmp1)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: abs_2s:
> > +;CHECK: abs.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32>
> %tmp1)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
> > +;CHECK-LABEL: abs_4s:
> > +;CHECK: abs.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32>
> %tmp1)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +define <1 x i64> @abs_1d(<1 x i64> %A) nounwind {
> > +; CHECK-LABEL: abs_1d:
> > +; CHECK: abs d0, d0
> > +  %abs = call <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64> %A)
> > +  ret <1 x i64> %abs
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8>) nounwind
> readnone
> > +declare <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16>) nounwind
> readnone
> > +declare <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16>) nounwind
> readnone
> > +declare <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32>) nounwind
> readnone
> > +declare <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32>) nounwind
> readnone
> > +declare <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64>) nounwind
> readnone
> > +
> > +define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C)
> nounwind {
> > +;CHECK-LABEL: sabal8h:
> > +;CHECK: sabal.8h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = load <8 x i16>* %C
> > +        %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C)
> nounwind {
> > +;CHECK-LABEL: sabal4s:
> > +;CHECK: sabal.4s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = load <4 x i32>* %C
> > +        %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C)
> nounwind {
> > +;CHECK-LABEL: sabal2d:
> > +;CHECK: sabal.2d
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = load <2 x i64>* %C
> > +        %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
> > +        %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
> > +        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
> > +        ret <2 x i64> %tmp5
> > +}
> > +
> > +define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>*
> %C) nounwind {
> > +;CHECK-LABEL: sabal2_8h:
> > +;CHECK: sabal2.8h
> > +        %load1 = load <16 x i8>* %A
> > +        %load2 = load <16 x i8>* %B
> > +        %tmp3 = load <8 x i16>* %C
> > +        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>*
> %C) nounwind {
> > +;CHECK-LABEL: sabal2_4s:
> > +;CHECK: sabal2.4s
> > +        %load1 = load <8 x i16>* %A
> > +        %load2 = load <8 x i16>* %B
> > +        %tmp3 = load <4 x i32>* %C
> > +        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>*
> %C) nounwind {
> > +;CHECK-LABEL: sabal2_2d:
> > +;CHECK: sabal2.2d
> > +        %load1 = load <4 x i32>* %A
> > +        %load2 = load <4 x i32>* %B
> > +        %tmp3 = load <2 x i64>* %C
> > +        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
> > +        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
> > +        ret <2 x i64> %tmp5
> > +}
> > +
> > +define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C)
> nounwind {
> > +;CHECK-LABEL: uabal8h:
> > +;CHECK: uabal.8h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = load <8 x i16>* %C
> > +        %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C)
> nounwind {
> > +;CHECK-LABEL: uabal4s:
> > +;CHECK: uabal.4s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = load <4 x i32>* %C
> > +        %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C)
> nounwind {
> > +;CHECK-LABEL: uabal2d:
> > +;CHECK: uabal.2d
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = load <2 x i64>* %C
> > +        %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
> > +        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
> > +        ret <2 x i64> %tmp5
> > +}
> > +
> > +define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>*
> %C) nounwind {
> > +;CHECK-LABEL: uabal2_8h:
> > +;CHECK: uabal2.8h
> > +        %load1 = load <16 x i8>* %A
> > +        %load2 = load <16 x i8>* %B
> > +        %tmp3 = load <8 x i16>* %C
> > +        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>*
> %C) nounwind {
> > +;CHECK-LABEL: uabal2_4s:
> > +;CHECK: uabal2.4s
> > +        %load1 = load <8 x i16>* %A
> > +        %load2 = load <8 x i16>* %B
> > +        %tmp3 = load <4 x i32>* %C
> > +        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>*
> %C) nounwind {
> > +;CHECK-LABEL: uabal2_2d:
> > +;CHECK: uabal2.2d
> > +        %load1 = load <4 x i32>* %A
> > +        %load2 = load <4 x i32>* %B
> > +        %tmp3 = load <2 x i64>* %C
> > +        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
> > +        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
> > +        ret <2 x i64> %tmp5
> > +}
> > +
> > +define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C)
> nounwind {
> > +;CHECK-LABEL: saba_8b:
> > +;CHECK: saba.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4 = load <8 x i8>* %C
> > +        %tmp5 = add <8 x i8> %tmp3, %tmp4
> > +        ret <8 x i8> %tmp5
> > +}
> > +
> > +define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C)
> nounwind {
> > +;CHECK-LABEL: saba_16b:
> > +;CHECK: saba.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp2 = load <16 x i8>* %B
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>
> %tmp1, <16 x i8> %tmp2)
> > +        %tmp4 = load <16 x i8>* %C
> > +        %tmp5 = add <16 x i8> %tmp3, %tmp4
> > +        ret <16 x i8> %tmp5
> > +}
> > +
> > +define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C)
> nounwind {
> > +;CHECK-LABEL: saba_4h:
> > +;CHECK: saba.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4 = load <4 x i16>* %C
> > +        %tmp5 = add <4 x i16> %tmp3, %tmp4
> > +        ret <4 x i16> %tmp5
> > +}
> > +
> > +define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C)
> nounwind {
> > +;CHECK-LABEL: saba_8h:
> > +;CHECK: saba.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>
> %tmp1, <8 x i16> %tmp2)
> > +        %tmp4 = load <8 x i16>* %C
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C)
> nounwind {
> > +;CHECK-LABEL: saba_2s:
> > +;CHECK: saba.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4 = load <2 x i32>* %C
> > +        %tmp5 = add <2 x i32> %tmp3, %tmp4
> > +        ret <2 x i32> %tmp5
> > +}
> > +
> > +define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C)
> nounwind {
> > +;CHECK-LABEL: saba_4s:
> > +;CHECK: saba.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>
> %tmp1, <4 x i32> %tmp2)
> > +        %tmp4 = load <4 x i32>* %C
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C)
> nounwind {
> > +;CHECK-LABEL: uaba_8b:
> > +;CHECK: uaba.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        %tmp4 = load <8 x i8>* %C
> > +        %tmp5 = add <8 x i8> %tmp3, %tmp4
> > +        ret <8 x i8> %tmp5
> > +}
> > +
> > +define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C)
> nounwind {
> > +;CHECK-LABEL: uaba_16b:
> > +;CHECK: uaba.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp2 = load <16 x i8>* %B
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>
> %tmp1, <16 x i8> %tmp2)
> > +        %tmp4 = load <16 x i8>* %C
> > +        %tmp5 = add <16 x i8> %tmp3, %tmp4
> > +        ret <16 x i8> %tmp5
> > +}
> > +
> > +define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C)
> nounwind {
> > +;CHECK-LABEL: uaba_4h:
> > +;CHECK: uaba.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        %tmp4 = load <4 x i16>* %C
> > +        %tmp5 = add <4 x i16> %tmp3, %tmp4
> > +        ret <4 x i16> %tmp5
> > +}
> > +
> > +define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C)
> nounwind {
> > +;CHECK-LABEL: uaba_8h:
> > +;CHECK: uaba.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>
> %tmp1, <8 x i16> %tmp2)
> > +        %tmp4 = load <8 x i16>* %C
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C)
> nounwind {
> > +;CHECK-LABEL: uaba_2s:
> > +;CHECK: uaba.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        %tmp4 = load <2 x i32>* %C
> > +        %tmp5 = add <2 x i32> %tmp3, %tmp4
> > +        ret <2 x i32> %tmp5
> > +}
> > +
> > +define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C)
> nounwind {
> > +;CHECK-LABEL: uaba_4s:
> > +;CHECK: uaba.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>
> %tmp1, <4 x i32> %tmp2)
> > +        %tmp4 = load <4 x i32>* %C
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +; Scalar FABD
> > +define float @fabds(float %a, float %b) nounwind {
> > +; CHECK-LABEL: fabds:
> > +; CHECK: fabd s0, s0, s1
> > +  %vabd.i = tail call float @llvm.arm64.sisd.fabd.f32(float %a, float
> %b) nounwind
> > +  ret float %vabd.i
> > +}
> > +
> > +define double @fabdd(double %a, double %b) nounwind {
> > +; CHECK-LABEL: fabdd:
> > +; CHECK: fabd d0, d0, d1
> > +  %vabd.i = tail call double @llvm.arm64.sisd.fabd.f64(double %a,
> double %b) nounwind
> > +  ret double %vabd.i
> > +}
> > +
> > +declare double @llvm.arm64.sisd.fabd.f64(double, double) nounwind
> readnone
> > +declare float @llvm.arm64.sisd.fabd.f32(float, float) nounwind readnone
> > +
> > +define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
> > +; CHECK-LABEL: uabdl_from_extract_dup:
> > +; CHECK-NOT: ext.16b
> > +; CHECK: uabdl2.2d
> > +  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
> > +  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
> > +
> > +  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +
> > +  %res = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>
> %lhs.high, <2 x i32> %rhsvec) nounwind
> > +  %res1 = zext <2 x i32> %res to <2 x i64>
> > +  ret <2 x i64> %res1
> > +}
> > +
> > +define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
> > +; CHECK-LABEL: sabdl_from_extract_dup:
> > +; CHECK-NOT: ext.16b
> > +; CHECK: sabdl2.2d
> > +  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
> > +  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
> > +
> > +  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +
> > +  %res = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>
> %lhs.high, <2 x i32> %rhsvec) nounwind
> > +  %res1 = zext <2 x i32> %res to <2 x i64>
> > +  ret <2 x i64> %res1
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vadd.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vadd.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vadd.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vadd.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,941 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -asm-verbose=false | FileCheck %s
> > +
> > +define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: addhn8b:
> > +;CHECK: addhn.8b
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16>
> %tmp1, <8 x i16> %tmp2)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: addhn4h:
> > +;CHECK: addhn.4h
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>
> %tmp1, <4 x i32> %tmp2)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
> > +;CHECK-LABEL: addhn2s:
> > +;CHECK: addhn.2s
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i64>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64>
> %tmp1, <2 x i64> %tmp2)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <16 x i8> @addhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
> > +;CHECK-LABEL: addhn2_16b:
> > +;CHECK: addhn.8b
> > +;CHECK-NEXT: addhn2.16b
> > +  %vaddhn2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16>
> %a, <8 x i16> %b) nounwind
> > +  %vaddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x
> i16> %a, <8 x i16> %b) nounwind
> > +  %res = shufflevector <8 x i8> %vaddhn2.i, <8 x i8> %vaddhn_high2.i,
> <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8,
> i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +  ret <16 x i8> %res
> > +}
> > +
> > +define <8 x i16> @addhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
> > +;CHECK-LABEL: addhn2_8h:
> > +;CHECK: addhn.4h
> > +;CHECK-NEXT: addhn2.8h
> > +  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x
> i32> %a, <4 x i32> %b) nounwind
> > +  %vaddhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4
> x i32> %a, <4 x i32> %b) nounwind
> > +  %res = shufflevector <4 x i16> %vaddhn2.i, <4 x i16> %vaddhn_high3.i,
> <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> > +  ret <8 x i16> %res
> > +}
> > +
> > +define <4 x i32> @addhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
> > +;CHECK-LABEL: addhn2_4s:
> > +;CHECK: addhn.2s
> > +;CHECK-NEXT: addhn2.4s
> > +  %vaddhn2.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x
> i64> %a, <2 x i64> %b) nounwind
> > +  %vaddhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2
> x i64> %a, <2 x i64> %b) nounwind
> > +  %res = shufflevector <2 x i32> %vaddhn2.i, <2 x i32> %vaddhn_high3.i,
> <4 x i32> <i32 0, i32 1, i32 2, i32 3>
> > +  ret <4 x i32> %res
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64>, <2 x i64>)
> nounwind readnone
> > +declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>)
> nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16>, <8 x i16>)
> nounwind readnone
> > +
> > +
> > +define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: raddhn8b:
> > +;CHECK: raddhn.8b
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>
> %tmp1, <8 x i16> %tmp2)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: raddhn4h:
> > +;CHECK: raddhn.4h
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>
> %tmp1, <4 x i32> %tmp2)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
> > +;CHECK-LABEL: raddhn2s:
> > +;CHECK: raddhn.2s
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i64>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>
> %tmp1, <2 x i64> %tmp2)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <16 x i8> @raddhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
> > +;CHECK-LABEL: raddhn2_16b:
> > +;CHECK: raddhn.8b
> > +;CHECK-NEXT: raddhn2.16b
> > +  %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x
> i16> %a, <8 x i16> %b) nounwind
> > +  %vraddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8
> x i16> %a, <8 x i16> %b) nounwind
> > +  %res = shufflevector <8 x i8> %vraddhn2.i, <8 x i8> %vraddhn_high2.i,
> <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8,
> i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +  ret <16 x i8> %res
> > +}
> > +
> > +define <8 x i16> @raddhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
> > +;CHECK-LABEL: raddhn2_8h:
> > +;CHECK: raddhn.4h
> > +;CHECK-NEXT: raddhn2.8h
> > +  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x
> i32> %a, <4 x i32> %b) nounwind
> > +  %vraddhn_high3.i = tail call <4 x i16>
> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
> > +  %res = shufflevector <4 x i16> %vraddhn2.i, <4 x i16>
> %vraddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32
> 6, i32 7>
> > +  ret <8 x i16> %res
> > +}
> > +
> > +define <4 x i32> @raddhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
> > +;CHECK-LABEL: raddhn2_4s:
> > +;CHECK: raddhn.2s
> > +;CHECK-NEXT: raddhn2.4s
> > +  %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x
> i64> %a, <2 x i64> %b) nounwind
> > +  %vraddhn_high3.i = tail call <2 x i32>
> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
> > +  %res = shufflevector <2 x i32> %vraddhn2.i, <2 x i32>
> %vraddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
> > +  ret <4 x i32> %res
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
> nounwind readnone
> > +declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
> nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
> nounwind readnone
> > +
> > +define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: saddl8h:
> > +;CHECK: saddl.8h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
> > +  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
> > +  %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @saddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: saddl4s:
> > +;CHECK: saddl.4s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
> > +  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
> > +  %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @saddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: saddl2d:
> > +;CHECK: saddl.2d
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
> > +  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
> > +  %tmp5 = add <2 x i64> %tmp3, %tmp4
> > +        ret <2 x i64> %tmp5
> > +}
> > +
> > +define <8 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
> > +; CHECK-LABEL: saddl2_8h:
> > +; CHECK-NEXT: saddl2.8h v0, v0, v1
> > +; CHECK-NEXT: ret
> > +  %tmp = bitcast <16 x i8> %a to <2 x i64>
> > +  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x
> i32> <i32 1>
> > +  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
> > +  %vmovl.i.i.i = sext <8 x i8> %tmp1 to <8 x i16>
> > +  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
> > +  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1
> x i32> <i32 1>
> > +  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
> > +  %vmovl.i.i5.i = sext <8 x i8> %tmp3 to <8 x i16>
> > +  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
> > +  ret <8 x i16> %add.i
> > +}
> > +
> > +define <4 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
> > +; CHECK-LABEL: saddl2_4s:
> > +; CHECK-NEXT: saddl2.4s v0, v0, v1
> > +; CHECK-NEXT: ret
> > +  %tmp = bitcast <8 x i16> %a to <2 x i64>
> > +  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x
> i32> <i32 1>
> > +  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
> > +  %vmovl.i.i.i = sext <4 x i16> %tmp1 to <4 x i32>
> > +  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
> > +  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1
> x i32> <i32 1>
> > +  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
> > +  %vmovl.i.i5.i = sext <4 x i16> %tmp3 to <4 x i32>
> > +  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
> > +  ret <4 x i32> %add.i
> > +}
> > +
> > +define <2 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
> > +; CHECK-LABEL: saddl2_2d:
> > +; CHECK-NEXT: saddl2.2d v0, v0, v1
> > +; CHECK-NEXT: ret
> > +  %tmp = bitcast <4 x i32> %a to <2 x i64>
> > +  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x
> i32> <i32 1>
> > +  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
> > +  %vmovl.i.i.i = sext <2 x i32> %tmp1 to <2 x i64>
> > +  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
> > +  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1
> x i32> <i32 1>
> > +  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
> > +  %vmovl.i.i5.i = sext <2 x i32> %tmp3 to <2 x i64>
> > +  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
> > +  ret <2 x i64> %add.i
> > +}
> > +
> > +define <8 x i16> @uaddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: uaddl8h:
> > +;CHECK: uaddl.8h
> > +  %tmp1 = load <8 x i8>* %A
> > +  %tmp2 = load <8 x i8>* %B
> > +  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
> > +  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
> > +  %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +  ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @uaddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uaddl4s:
> > +;CHECK: uaddl.4s
> > +  %tmp1 = load <4 x i16>* %A
> > +  %tmp2 = load <4 x i16>* %B
> > +  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
> > +  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
> > +  %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +  ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @uaddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uaddl2d:
> > +;CHECK: uaddl.2d
> > +  %tmp1 = load <2 x i32>* %A
> > +  %tmp2 = load <2 x i32>* %B
> > +  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
> > +  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
> > +  %tmp5 = add <2 x i64> %tmp3, %tmp4
> > +  ret <2 x i64> %tmp5
> > +}
> > +
> > +
> > +define <8 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
> > +; CHECK-LABEL: uaddl2_8h:
> > +; CHECK-NEXT: uaddl2.8h v0, v0, v1
> > +; CHECK-NEXT: ret
> > +  %tmp = bitcast <16 x i8> %a to <2 x i64>
> > +  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x
> i32> <i32 1>
> > +  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
> > +  %vmovl.i.i.i = zext <8 x i8> %tmp1 to <8 x i16>
> > +  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
> > +  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1
> x i32> <i32 1>
> > +  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
> > +  %vmovl.i.i5.i = zext <8 x i8> %tmp3 to <8 x i16>
> > +  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
> > +  ret <8 x i16> %add.i
> > +}
> > +
> > +define <4 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
> > +; CHECK-LABEL: uaddl2_4s:
> > +; CHECK-NEXT: uaddl2.4s v0, v0, v1
> > +; CHECK-NEXT: ret
> > +  %tmp = bitcast <8 x i16> %a to <2 x i64>
> > +  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x
> i32> <i32 1>
> > +  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
> > +  %vmovl.i.i.i = zext <4 x i16> %tmp1 to <4 x i32>
> > +  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
> > +  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1
> x i32> <i32 1>
> > +  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
> > +  %vmovl.i.i5.i = zext <4 x i16> %tmp3 to <4 x i32>
> > +  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
> > +  ret <4 x i32> %add.i
> > +}
> > +
> > +define <2 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
> > +; CHECK-LABEL: uaddl2_2d:
> > +; CHECK-NEXT: uaddl2.2d v0, v0, v1
> > +; CHECK-NEXT: ret
> > +  %tmp = bitcast <4 x i32> %a to <2 x i64>
> > +  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x
> i32> <i32 1>
> > +  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
> > +  %vmovl.i.i.i = zext <2 x i32> %tmp1 to <2 x i64>
> > +  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
> > +  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1
> x i32> <i32 1>
> > +  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
> > +  %vmovl.i.i5.i = zext <2 x i32> %tmp3 to <2 x i64>
> > +  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
> > +  ret <2 x i64> %add.i
> > +}
> > +
> > +define <8 x i16> @uaddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: uaddw8h:
> > +;CHECK: uaddw.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +  %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
> > +  %tmp4 = add <8 x i16> %tmp1, %tmp3
> > +        ret <8 x i16> %tmp4
> > +}
> > +
> > +define <4 x i32> @uaddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uaddw4s:
> > +;CHECK: uaddw.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +  %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
> > +  %tmp4 = add <4 x i32> %tmp1, %tmp3
> > +        ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uaddw2d:
> > +;CHECK: uaddw.2d
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +  %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
> > +  %tmp4 = add <2 x i64> %tmp1, %tmp3
> > +        ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: uaddw2_8h:
> > +;CHECK: uaddw2.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +
> > +        %tmp2 = load <16 x i8>* %B
> > +        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %ext2 = zext <8 x i8> %high2 to <8 x i16>
> > +
> > +        %res = add <8 x i16> %tmp1, %ext2
> > +        ret <8 x i16> %res
> > +}
> > +
> > +define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uaddw2_4s:
> > +;CHECK: uaddw2.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +
> > +        %tmp2 = load <8 x i16>* %B
> > +        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %ext2 = zext <4 x i16> %high2 to <4 x i32>
> > +
> > +        %res = add <4 x i32> %tmp1, %ext2
> > +        ret <4 x i32> %res
> > +}
> > +
> > +define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uaddw2_2d:
> > +;CHECK: uaddw2.2d
> > +        %tmp1 = load <2 x i64>* %A
> > +
> > +        %tmp2 = load <4 x i32>* %B
> > +        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %ext2 = zext <2 x i32> %high2 to <2 x i64>
> > +
> > +        %res = add <2 x i64> %tmp1, %ext2
> > +        ret <2 x i64> %res
> > +}
> > +
> > +define <8 x i16> @saddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: saddw8h:
> > +;CHECK: saddw.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
> > +        %tmp4 = add <8 x i16> %tmp1, %tmp3
> > +        ret <8 x i16> %tmp4
> > +}
> > +
> > +define <4 x i32> @saddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: saddw4s:
> > +;CHECK: saddw.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
> > +        %tmp4 = add <4 x i32> %tmp1, %tmp3
> > +        ret <4 x i32> %tmp4
> > +}
> > +
> > +define <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: saddw2d:
> > +;CHECK: saddw.2d
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
> > +        %tmp4 = add <2 x i64> %tmp1, %tmp3
> > +        ret <2 x i64> %tmp4
> > +}
> > +
> > +define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: saddw2_8h:
> > +;CHECK: saddw2.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +
> > +        %tmp2 = load <16 x i8>* %B
> > +        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x
> i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        %ext2 = sext <8 x i8> %high2 to <8 x i16>
> > +
> > +        %res = add <8 x i16> %tmp1, %ext2
> > +        ret <8 x i16> %res
> > +}
> > +
> > +define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: saddw2_4s:
> > +;CHECK: saddw2.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +
> > +        %tmp2 = load <8 x i16>* %B
> > +        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> > +        %ext2 = sext <4 x i16> %high2 to <4 x i32>
> > +
> > +        %res = add <4 x i32> %tmp1, %ext2
> > +        ret <4 x i32> %res
> > +}
> > +
> > +define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: saddw2_2d:
> > +;CHECK: saddw2.2d
> > +        %tmp1 = load <2 x i64>* %A
> > +
> > +        %tmp2 = load <4 x i32>* %B
> > +        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x
> i32> <i32 2, i32 3>
> > +        %ext2 = sext <2 x i32> %high2 to <2 x i64>
> > +
> > +        %res = add <2 x i64> %tmp1, %ext2
> > +        ret <2 x i64> %res
> > +}
> > +
> > +define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: saddlp4h:
> > +;CHECK: saddlp.4h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x
> i8> %tmp1)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: saddlp2s:
> > +;CHECK: saddlp.2s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x
> i16> %tmp1)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: saddlp1d:
> > +;CHECK: saddlp.1d
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp3 = call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x
> i32> %tmp1)
> > +        ret <1 x i64> %tmp3
> > +}
> > +
> > +define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: saddlp8h:
> > +;CHECK: saddlp.8h
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16
> x i8> %tmp1)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: saddlp4s:
> > +;CHECK: saddlp.4s
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x
> i16> %tmp1)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind {
> > +;CHECK-LABEL: saddlp2d:
> > +;CHECK: saddlp.2d
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x
> i32> %tmp1)
> > +        ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <4 x i16>  @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8>)
> nounwind readnone
> > +declare <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16>)
> nounwind readnone
> > +declare <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32>)
> nounwind readnone
> > +
> > +declare <8 x i16>  @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32>)
> nounwind readnone
> > +
> > +define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: uaddlp4h:
> > +;CHECK: uaddlp.4h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x
> i8> %tmp1)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: uaddlp2s:
> > +;CHECK: uaddlp.2s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x
> i16> %tmp1)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: uaddlp1d:
> > +;CHECK: uaddlp.1d
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp3 = call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x
> i32> %tmp1)
> > +        ret <1 x i64> %tmp3
> > +}
> > +
> > +define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: uaddlp8h:
> > +;CHECK: uaddlp.8h
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16
> x i8> %tmp1)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: uaddlp4s:
> > +;CHECK: uaddlp.4s
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x
> i16> %tmp1)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind {
> > +;CHECK-LABEL: uaddlp2d:
> > +;CHECK: uaddlp.2d
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x
> i32> %tmp1)
> > +        ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <4 x i16>  @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8>)
> nounwind readnone
> > +declare <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16>)
> nounwind readnone
> > +declare <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32>)
> nounwind readnone
> > +
> > +declare <8 x i16>  @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32>)
> nounwind readnone
> > +
> > +define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: sadalp4h:
> > +;CHECK: sadalp.4h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x
> i8> %tmp1)
> > +        %tmp4 = load <4 x i16>* %B
> > +        %tmp5 = add <4 x i16> %tmp3, %tmp4
> > +        ret <4 x i16> %tmp5
> > +}
> > +
> > +define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: sadalp2s:
> > +;CHECK: sadalp.2s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x
> i16> %tmp1)
> > +        %tmp4 = load <2 x i32>* %B
> > +        %tmp5 = add <2 x i32> %tmp3, %tmp4
> > +        ret <2 x i32> %tmp5
> > +}
> > +
> > +define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: sadalp8h:
> > +;CHECK: sadalp.8h
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16
> x i8> %tmp1)
> > +        %tmp4 = load <8 x i16>* %B
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: sadalp4s:
> > +;CHECK: sadalp.4s
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x
> i16> %tmp1)
> > +        %tmp4 = load <4 x i32>* %B
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
> > +;CHECK-LABEL: sadalp2d:
> > +;CHECK: sadalp.2d
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x
> i32> %tmp1)
> > +        %tmp4 = load <2 x i64>* %B
> > +        %tmp5 = add <2 x i64> %tmp3, %tmp4
> > +        ret <2 x i64> %tmp5
> > +}
> > +
> > +define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uadalp4h:
> > +;CHECK: uadalp.4h
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x
> i8> %tmp1)
> > +        %tmp4 = load <4 x i16>* %B
> > +        %tmp5 = add <4 x i16> %tmp3, %tmp4
> > +        ret <4 x i16> %tmp5
> > +}
> > +
> > +define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uadalp2s:
> > +;CHECK: uadalp.2s
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x
> i16> %tmp1)
> > +        %tmp4 = load <2 x i32>* %B
> > +        %tmp5 = add <2 x i32> %tmp3, %tmp4
> > +        ret <2 x i32> %tmp5
> > +}
> > +
> > +define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: uadalp8h:
> > +;CHECK: uadalp.8h
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16
> x i8> %tmp1)
> > +        %tmp4 = load <8 x i16>* %B
> > +        %tmp5 = add <8 x i16> %tmp3, %tmp4
> > +        ret <8 x i16> %tmp5
> > +}
> > +
> > +define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: uadalp4s:
> > +;CHECK: uadalp.4s
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x
> i16> %tmp1)
> > +        %tmp4 = load <4 x i32>* %B
> > +        %tmp5 = add <4 x i32> %tmp3, %tmp4
> > +        ret <4 x i32> %tmp5
> > +}
> > +
> > +define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
> > +;CHECK-LABEL: uadalp2d:
> > +;CHECK: uadalp.2d
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x
> i32> %tmp1)
> > +        %tmp4 = load <2 x i64>* %B
> > +        %tmp5 = add <2 x i64> %tmp3, %tmp4
> > +        ret <2 x i64> %tmp5
> > +}
> > +
> > +define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: addp_8b:
> > +;CHECK: addp.8b
> > +        %tmp1 = load <8 x i8>* %A
> > +        %tmp2 = load <8 x i8>* %B
> > +        %tmp3 = call <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>
> %tmp1, <8 x i8> %tmp2)
> > +        ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: addp_16b:
> > +;CHECK: addp.16b
> > +        %tmp1 = load <16 x i8>* %A
> > +        %tmp2 = load <16 x i8>* %B
> > +        %tmp3 = call <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>
> %tmp1, <16 x i8> %tmp2)
> > +        ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: addp_4h:
> > +;CHECK: addp.4h
> > +        %tmp1 = load <4 x i16>* %A
> > +        %tmp2 = load <4 x i16>* %B
> > +        %tmp3 = call <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>
> %tmp1, <4 x i16> %tmp2)
> > +        ret <4 x i16> %tmp3
> > +}
> > +
> > +define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: addp_8h:
> > +;CHECK: addp.8h
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %tmp3 = call <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>
> %tmp1, <8 x i16> %tmp2)
> > +        ret <8 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: addp_2s:
> > +;CHECK: addp.2s
> > +        %tmp1 = load <2 x i32>* %A
> > +        %tmp2 = load <2 x i32>* %B
> > +        %tmp3 = call <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>
> %tmp1, <2 x i32> %tmp2)
> > +        ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: addp_4s:
> > +;CHECK: addp.4s
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %tmp3 = call <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>
> %tmp1, <4 x i32> %tmp2)
> > +        ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
> > +;CHECK-LABEL: addp_2d:
> > +;CHECK: addp.2d
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i64>* %B
> > +        %tmp3 = call <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>
> %tmp1, <2 x i64> %tmp2)
> > +        ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
> nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
> nounwind readnone
> > +declare <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>, <4 x i16>)
> nounwind readnone
> > +declare <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>, <8 x i16>)
> nounwind readnone
> > +declare <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>, <2 x i32>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>, <4 x i32>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
> nounwind readnone
> > +
> > +define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind
> {
> > +;CHECK-LABEL: faddp_2s:
> > +;CHECK: faddp.2s
> > +        %tmp1 = load <2 x float>* %A
> > +        %tmp2 = load <2 x float>* %B
> > +        %tmp3 = call <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x
> float> %tmp1, <2 x float> %tmp2)
> > +        ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind
> {
> > +;CHECK-LABEL: faddp_4s:
> > +;CHECK: faddp.4s
> > +        %tmp1 = load <4 x float>* %A
> > +        %tmp2 = load <4 x float>* %B
> > +        %tmp3 = call <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x
> float> %tmp1, <4 x float> %tmp2)
> > +        ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B)
> nounwind {
> > +;CHECK-LABEL: faddp_2d:
> > +;CHECK: faddp.2d
> > +        %tmp1 = load <2 x double>* %A
> > +        %tmp2 = load <2 x double>* %B
> > +        %tmp3 = call <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x
> double> %tmp1, <2 x double> %tmp2)
> > +        ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float>, <2 x
> float>) nounwind readnone
> > +declare <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float>, <4 x
> float>) nounwind readnone
> > +declare <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double>, <2 x
> double>) nounwind readnone
> > +
> > +define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
> > +; CHECK-LABEL: uaddl2_duprhs
> > +; CHECK-NOT: ext.16b
> > +; CHECK: uaddl2.2d
> > +  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
> > +  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
> > +
> > +  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +
> > +  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
> > +  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
> > +
> > +  %res = add <2 x i64> %lhs.ext, %rhs.ext
> > +  ret <2 x i64> %res
> > +}
> > +
> > +define <2 x i64> @saddl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
> > +; CHECK-LABEL: saddl2_duplhs
> > +; CHECK-NOT: ext.16b
> > +; CHECK: saddl2.2d
> > +  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
> > +  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
> > +
> > +  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +
> > +  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
> > +  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
> > +
> > +  %res = add <2 x i64> %lhs.ext, %rhs.ext
> > +  ret <2 x i64> %res
> > +}
> > +
> > +define <2 x i64> @usubl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
> > +; CHECK-LABEL: usubl2_duprhs
> > +; CHECK-NOT: ext.16b
> > +; CHECK: usubl2.2d
> > +  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
> > +  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
> > +
> > +  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +
> > +  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
> > +  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
> > +
> > +  %res = sub <2 x i64> %lhs.ext, %rhs.ext
> > +  ret <2 x i64> %res
> > +}
> > +
> > +define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
> > +; CHECK-LABEL: ssubl2_duplhs
> > +; CHECK-NOT: ext.16b
> > +; CHECK: ssubl2.2d
> > +  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
> > +  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
> > +
> > +  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32>
> <i32 2, i32 3>
> > +
> > +  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
> > +  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
> > +
> > +  %res = sub <2 x i64> %lhs.ext, %rhs.ext
> > +  ret <2 x i64> %res
> > +}
> > +
> > +define <8 x i8> @addhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind
> {
> > +;CHECK-LABEL: addhn8b_natural:
> > +;CHECK: addhn.8b
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %sum = add <8 x i16> %tmp1, %tmp2
> > +        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8,
> i16 8, i16 8, i16 8, i16 8>
> > +        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
> > +        ret <8 x i8> %narrowed
> > +}
> > +
> > +define <4 x i16> @addhn4h_natural(<4 x i32>* %A, <4 x i32>* %B)
> nounwind {
> > +;CHECK-LABEL: addhn4h_natural:
> > +;CHECK: addhn.4h
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %sum = add <4 x i32> %tmp1, %tmp2
> > +        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32
> 16>
> > +        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
> > +        ret <4 x i16> %narrowed
> > +}
> > +
> > +define <2 x i32> @addhn2s_natural(<2 x i64>* %A, <2 x i64>* %B)
> nounwind {
> > +;CHECK-LABEL: addhn2s_natural:
> > +;CHECK: addhn.2s
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i64>* %B
> > +        %sum = add <2 x i64> %tmp1, %tmp2
> > +        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
> > +        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
> > +        ret <2 x i32> %narrowed
> > +}
> > +
> > +define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x
> i16>* %B) nounwind {
> > +;CHECK-LABEL: addhn2_16b_natural:
> > +;CHECK: addhn2.16b
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %sum = add <8 x i16> %tmp1, %tmp2
> > +        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8,
> i16 8, i16 8, i16 8, i16 8>
> > +        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
> > +        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x
> i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9,
> i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        ret <16 x i8> %res
> > +}
> > +
> > +define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x
> i32>* %B) nounwind {
> > +;CHECK-LABEL: addhn2_8h_natural:
> > +;CHECK: addhn2.8h
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %sum = add <4 x i32> %tmp1, %tmp2
> > +        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32
> 16>
> > +        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
> > +        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x
> i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> > +        ret <8 x i16> %res
> > +}
> > +
> > +define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x
> i64>* %B) nounwind {
> > +;CHECK-LABEL: addhn2_4s_natural:
> > +;CHECK: addhn2.4s
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i64>* %B
> > +        %sum = add <2 x i64> %tmp1, %tmp2
> > +        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
> > +        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
> > +        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> > +        ret <4 x i32> %res
> > +}
> > +
> > +define <8 x i8> @subhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind
> {
> > +;CHECK-LABEL: subhn8b_natural:
> > +;CHECK: subhn.8b
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %diff = sub <8 x i16> %tmp1, %tmp2
> > +        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8,
> i16 8, i16 8, i16 8, i16 8>
> > +        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
> > +        ret <8 x i8> %narrowed
> > +}
> > +
> > +define <4 x i16> @subhn4h_natural(<4 x i32>* %A, <4 x i32>* %B)
> nounwind {
> > +;CHECK-LABEL: subhn4h_natural:
> > +;CHECK: subhn.4h
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %diff = sub <4 x i32> %tmp1, %tmp2
> > +        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32
> 16>
> > +        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
> > +        ret <4 x i16> %narrowed
> > +}
> > +
> > +define <2 x i32> @subhn2s_natural(<2 x i64>* %A, <2 x i64>* %B)
> nounwind {
> > +;CHECK-LABEL: subhn2s_natural:
> > +;CHECK: subhn.2s
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i64>* %B
> > +        %diff = sub <2 x i64> %tmp1, %tmp2
> > +        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
> > +        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
> > +        ret <2 x i32> %narrowed
> > +}
> > +
> > +define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x
> i16>* %B) nounwind {
> > +;CHECK-LABEL: subhn2_16b_natural:
> > +;CHECK: subhn2.16b
> > +        %tmp1 = load <8 x i16>* %A
> > +        %tmp2 = load <8 x i16>* %B
> > +        %diff = sub <8 x i16> %tmp1, %tmp2
> > +        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8,
> i16 8, i16 8, i16 8, i16 8>
> > +        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
> > +        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x
> i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9,
> i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
> > +        ret <16 x i8> %res
> > +}
> > +
> > +define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x
> i32>* %B) nounwind {
> > +;CHECK-LABEL: subhn2_8h_natural:
> > +;CHECK: subhn2.8h
> > +        %tmp1 = load <4 x i32>* %A
> > +        %tmp2 = load <4 x i32>* %B
> > +        %diff = sub <4 x i32> %tmp1, %tmp2
> > +        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32
> 16>
> > +        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
> > +        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x
> i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> > +        ret <8 x i16> %res
> > +}
> > +
> > +define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x
> i64>* %B) nounwind {
> > +;CHECK-LABEL: subhn2_4s_natural:
> > +;CHECK: subhn2.4s
> > +        %tmp1 = load <2 x i64>* %A
> > +        %tmp2 = load <2 x i64>* %B
> > +        %diff = sub <2 x i64> %tmp1, %tmp2
> > +        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
> > +        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
> > +        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> > +        ret <4 x i32> %res
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vaddlv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vaddlv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vaddlv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vaddlv.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,26 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
> > +
> > +define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone {
> > +; CHECK: test_vaddlv_s32
> > +; CHECK: saddlp.1d v[[REGNUM:[0-9]+]], v[[INREG:[0-9]+]]
> > +; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddlv.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32>
> %a1) nounwind
> > +  ret i64 %vaddlv.i
> > +}
> > +
> > +define i64 @test_vaddlv_u32(<2 x i32> %a1) nounwind readnone {
> > +; CHECK: test_vaddlv_u32
> > +; CHECK: uaddlp.1d v[[REGNUM:[0-9]+]], v[[INREG:[0-9]+]]
> > +; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddlv.i = tail call i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32>
> %a1) nounwind
> > +  ret i64 %vaddlv.i
> > +}
> > +
> > +declare i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind
> readnone
> > +
> > +declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind
> readnone
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vaddv.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vaddv.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vaddv.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vaddv.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,233 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
> > +
> > +define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
> > +; CHECK-LABEL: test_vaddv_s8:
> > +; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a1)
> > +  %0 = trunc i32 %vaddv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define signext i16 @test_vaddv_s16(<4 x i16> %a1) {
> > +; CHECK-LABEL: test_vaddv_s16:
> > +; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>
> %a1)
> > +  %0 = trunc i32 %vaddv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vaddv_s32(<2 x i32> %a1) {
> > +; CHECK-LABEL: test_vaddv_s32:
> > +; 2 x i32 is not supported by the ISA, thus, this is a special case
> > +; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>
> %a1)
> > +  ret i32 %vaddv.i
> > +}
> > +
> > +define i64 @test_vaddv_s64(<2 x i64> %a1) {
> > +; CHECK-LABEL: test_vaddv_s64:
> > +; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
> > +; CHECK-NEXT: fmov x0, [[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64>
> %a1)
> > +  ret i64 %vaddv.i
> > +}
> > +
> > +define zeroext i8 @test_vaddv_u8(<8 x i8> %a1) {
> > +; CHECK-LABEL: test_vaddv_u8:
> > +; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
> > +  %0 = trunc i32 %vaddv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define i32 @test_vaddv_u8_masked(<8 x i8> %a1) {
> > +; CHECK-LABEL: test_vaddv_u8_masked:
> > +; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
> > +  %0 = and i32 %vaddv.i, 511 ; 0x1ff
> > +  ret i32 %0
> > +}
> > +
> > +define zeroext i16 @test_vaddv_u16(<4 x i16> %a1) {
> > +; CHECK-LABEL: test_vaddv_u16:
> > +; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16>
> %a1)
> > +  %0 = trunc i32 %vaddv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vaddv_u16_masked(<4 x i16> %a1) {
> > +; CHECK-LABEL: test_vaddv_u16_masked:
> > +; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16>
> %a1)
> > +  %0 = and i32 %vaddv.i, 3276799 ; 0x31ffff
> > +  ret i32 %0
> > +}
> > +
> > +define i32 @test_vaddv_u32(<2 x i32> %a1) {
> > +; CHECK-LABEL: test_vaddv_u32:
> > +; 2 x i32 is not supported by the ISA, thus, this is a special case
> > +; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32>
> %a1)
> > +  ret i32 %vaddv.i
> > +}
> > +
> > +define float @test_vaddv_f32(<2 x float> %a1) {
> > +; CHECK-LABEL: test_vaddv_f32:
> > +; CHECK: faddp.2s s0, v0
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v2f32(<2 x
> float> %a1)
> > +  ret float %vaddv.i
> > +}
> > +
> > +define float @test_vaddv_v4f32(<4 x float> %a1) {
> > +; CHECK-LABEL: test_vaddv_v4f32:
> > +; CHECK: faddp.4s [[REGNUM:v[0-9]+]], v0, v0
> > +; CHECK: faddp.2s s0, [[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v4f32(<4 x
> float> %a1)
> > +  ret float %vaddv.i
> > +}
> > +
> > +define double @test_vaddv_f64(<2 x double> %a1) {
> > +; CHECK-LABEL: test_vaddv_f64:
> > +; CHECK: faddp.2d d0, v0
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call double @llvm.arm64.neon.faddv.f64.v2f64(<2 x
> double> %a1)
> > +  ret double %vaddv.i
> > +}
> > +
> > +define i64 @test_vaddv_u64(<2 x i64> %a1) {
> > +; CHECK-LABEL: test_vaddv_u64:
> > +; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
> > +; CHECK-NEXT: fmov x0, [[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64>
> %a1)
> > +  ret i64 %vaddv.i
> > +}
> > +
> > +define signext i8 @test_vaddvq_s8(<16 x i8> %a1) {
> > +; CHECK-LABEL: test_vaddvq_s8:
> > +; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>
> %a1)
> > +  %0 = trunc i32 %vaddv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define signext i16 @test_vaddvq_s16(<8 x i16> %a1) {
> > +; CHECK-LABEL: test_vaddvq_s16:
> > +; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>
> %a1)
> > +  %0 = trunc i32 %vaddv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vaddvq_s32(<4 x i32> %a1) {
> > +; CHECK-LABEL: test_vaddvq_s32:
> > +; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, [[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>
> %a1)
> > +  ret i32 %vaddv.i
> > +}
> > +
> > +define zeroext i8 @test_vaddvq_u8(<16 x i8> %a1) {
> > +; CHECK-LABEL: test_vaddvq_u8:
> > +; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8>
> %a1)
> > +  %0 = trunc i32 %vaddv.i to i8
> > +  ret i8 %0
> > +}
> > +
> > +define zeroext i16 @test_vaddvq_u16(<8 x i16> %a1) {
> > +; CHECK-LABEL: test_vaddvq_u16:
> > +; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w0, s[[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16>
> %a1)
> > +  %0 = trunc i32 %vaddv.i to i16
> > +  ret i16 %0
> > +}
> > +
> > +define i32 @test_vaddvq_u32(<4 x i32> %a1) {
> > +; CHECK-LABEL: test_vaddvq_u32:
> > +; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
> > +; CHECK-NEXT: fmov [[FMOVRES:w[0-9]+]], [[REGNUM]]
> > +; CHECK-NEXT: ret
> > +entry:
> > +  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32>
> %a1)
> > +  ret i32 %vaddv.i
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32>)
> > +
> > +declare i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16>)
> > +
> > +declare i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8>)
> > +
> > +declare i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>)
> > +
> > +declare i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>)
> > +
> > +declare i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>)
> > +
> > +declare i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64>)
> > +
> > +declare i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32>)
> > +
> > +declare i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16>)
> > +
> > +declare i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8>)
> > +
> > +declare i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>)
> > +
> > +declare i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64>)
> > +
> > +declare i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>)
> > +
> > +declare i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8>)
> > +
> > +declare float @llvm.arm64.neon.faddv.f32.v2f32(<2 x float> %a1)
> > +declare float @llvm.arm64.neon.faddv.f32.v4f32(<4 x float> %a1)
> > +declare double @llvm.arm64.neon.faddv.f64.v2f64(<2 x double> %a1)
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/variadic-aapcs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/variadic-aapcs.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/variadic-aapcs.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/variadic-aapcs.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,143 @@
> > +; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu
> -pre-RA-sched=linearize -enable-misched=false < %s | FileCheck %s
> > +
> > +%va_list = type {i8*, i8*, i8*, i32, i32}
> > +
> > + at var = global %va_list zeroinitializer, align 8
> > +
> > +declare void @llvm.va_start(i8*)
> > +
> > +define void @test_simple(i32 %n, ...) {
> > +; CHECK-LABEL: test_simple:
> > +; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
> > +; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
> > +
> > +; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
> > +
> > +; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]
> > +; ... omit middle ones ...
> > +; CHECK: str x7, [sp, #
> > +
> > +; CHECK: stp q0, q1, [sp]
> > +; ... omit middle ones ...
> > +; CHECK: stp q6, q7, [sp, #
> > +
> > +; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
> > +
> > +; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
> > +; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
> > +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
> > +; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
> > +
> > +; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
> > +; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
> > +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
> > +
> > +; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
> > +; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
> > +
> > +; CHECK: orr [[VR_OFFS:w[0-9]+]], wzr, #0xffffff80
> > +; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
> > +
> > +  %addr = bitcast %va_list* @var to i8*
> > +  call void @llvm.va_start(i8* %addr)
> > +
> > +  ret void
> > +}
> > +
> > +define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
> > +; CHECK-LABEL: test_fewargs:
> > +; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
> > +; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
> > +
> > +; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
> > +
> > +; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]
> > +; ... omit middle ones ...
> > +; CHECK: str x7, [sp, #
> > +
> > +; CHECK: stp q1, q2, [sp]
> > +; ... omit middle ones ...
> > +; CHECK: str q7, [sp, #
> > +
> > +; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
> > +
> > +; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
> > +; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
> > +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
> > +; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
> > +
> > +; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
> > +; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
> > +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
> > +
> > +; CHECK: movn [[GR_OFFS:w[0-9]+]], #39
> > +; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
> > +
> > +; CHECK: movn [[VR_OFFS:w[0-9]+]], #111
> > +; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
> > +
> > +  %addr = bitcast %va_list* @var to i8*
> > +  call void @llvm.va_start(i8* %addr)
> > +
> > +  ret void
> > +}
> > +
> > +define void @test_nospare([8 x i64], [8 x float], ...) {
> > +; CHECK-LABEL: test_nospare:
> > +
> > +  %addr = bitcast %va_list* @var to i8*
> > +  call void @llvm.va_start(i8* %addr)
> > +; CHECK-NOT: sub sp, sp
> > +; CHECK: mov [[STACK:x[0-9]+]], sp
> > +; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var]
> > +
> > +  ret void
> > +}
> > +
> > +; If there are non-variadic arguments on the stack (here two i64s) then
> the
> > +; __stack field should point just past them.
> > +define void @test_offsetstack([10 x i64], [3 x float], ...) {
> > +; CHECK-LABEL: test_offsetstack:
> > +; CHECK: sub sp, sp, #80
> > +; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
> > +; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var]
> > +
> > +  %addr = bitcast %va_list* @var to i8*
> > +  call void @llvm.va_start(i8* %addr)
> > +  ret void
> > +}
> > +
> > +declare void @llvm.va_end(i8*)
> > +
> > +define void @test_va_end() nounwind {
> > +; CHECK-LABEL: test_va_end:
> > +; CHECK-NEXT: BB#0
> > +
> > +  %addr = bitcast %va_list* @var to i8*
> > +  call void @llvm.va_end(i8* %addr)
> > +
> > +  ret void
> > +; CHECK-NEXT: ret
> > +}
> > +
> > +declare void @llvm.va_copy(i8* %dest, i8* %src)
> > +
> > + at second_list = global %va_list zeroinitializer
> > +
> > +define void @test_va_copy() {
> > +; CHECK-LABEL: test_va_copy:
> > +  %srcaddr = bitcast %va_list* @var to i8*
> > +  %dstaddr = bitcast %va_list* @second_list to i8*
> > +  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
> > +
> > +; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var
> > +
> > +; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]]
> > +; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list
> > +; CHECK: str [[BLOCK]], [x[[DST]]]
> > +
> > +; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]], #16]
> > +; CHECK: str [[BLOCK]], [x[[DST]], #16]
> > +  ret void
> > +; CHECK: ret
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vbitwise.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vbitwise.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vbitwise.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vbitwise.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,91 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: rbit_8b:
> > +;CHECK: rbit.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp3 = call <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8> %tmp1)
> > +       ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: rbit_16b:
> > +;CHECK: rbit.16b
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp3 = call <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8>
> %tmp1)
> > +       ret <16 x i8> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8>) nounwind
> readnone
> > +
> > +define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: sxtl8h:
> > +;CHECK: sshll.8h
> > +       %tmp1 = load <8 x i8>* %A
> > +  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
> > +  ret <8 x i16> %tmp2
> > +}
> > +
> > +define <8 x i16> @uxtl8h(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: uxtl8h:
> > +;CHECK: ushll.8h
> > +       %tmp1 = load <8 x i8>* %A
> > +  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
> > +  ret <8 x i16> %tmp2
> > +}
> > +
> > +define <4 x i32> @sxtl4s(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: sxtl4s:
> > +;CHECK: sshll.4s
> > +       %tmp1 = load <4 x i16>* %A
> > +  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
> > +  ret <4 x i32> %tmp2
> > +}
> > +
> > +define <4 x i32> @uxtl4s(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: uxtl4s:
> > +;CHECK: ushll.4s
> > +       %tmp1 = load <4 x i16>* %A
> > +  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
> > +  ret <4 x i32> %tmp2
> > +}
> > +
> > +define <2 x i64> @sxtl2d(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: sxtl2d:
> > +;CHECK: sshll.2d
> > +       %tmp1 = load <2 x i32>* %A
> > +  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
> > +  ret <2 x i64> %tmp2
> > +}
> > +
> > +define <2 x i64> @uxtl2d(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: uxtl2d:
> > +;CHECK: ushll.2d
> > +       %tmp1 = load <2 x i32>* %A
> > +  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
> > +  ret <2 x i64> %tmp2
> > +}
> > +
> > +; Check for incorrect use of vector bic.
> > +; rdar://11553859
> > +define void @test_vsliq(i8* nocapture %src, i8* nocapture %dest)
> nounwind noinline ssp {
> > +entry:
> > +; CHECK-LABEL: test_vsliq:
> > +; CHECK-NOT: bic
> > +; CHECK: movi.2d [[REG1:v[0-9]+]], #0x0000ff000000ff
> > +; CHECK: and.16b v{{[0-9]+}}, v{{[0-9]+}}, [[REG1]]
> > +  %0 = bitcast i8* %src to <16 x i8>*
> > +  %1 = load <16 x i8>* %0, align 16
> > +  %and.i = and <16 x i8> %1, <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8
> 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>
> > +  %2 = bitcast <16 x i8> %and.i to <8 x i16>
> > +  %vshl_n = shl <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8, i16 8>
> > +  %3 = or <8 x i16> %2, %vshl_n
> > +  %4 = bitcast <8 x i16> %3 to <4 x i32>
> > +  %vshl_n8 = shl <4 x i32> %4, <i32 16, i32 16, i32 16, i32 16>
> > +  %5 = or <4 x i32> %4, %vshl_n8
> > +  %6 = bitcast <4 x i32> %5 to <16 x i8>
> > +  %7 = bitcast i8* %dest to <16 x i8>*
> > +  store <16 x i8> %6, <16 x i8>* %7, align 16
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vclz.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vclz.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vclz.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vclz.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,109 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
> > +
> > +define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclz_u8:
> > +  ; CHECK: clz.8b v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
> nounwind
> > +  ret <8 x i8> %vclz.i
> > +}
> > +
> > +define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclz_s8:
> > +  ; CHECK: clz.8b v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
> nounwind
> > +  ret <8 x i8> %vclz.i
> > +}
> > +
> > +define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclz_u16:
> > +  ; CHECK: clz.4h v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1
> false) nounwind
> > +  ret <4 x i16> %vclz1.i
> > +}
> > +
> > +define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclz_s16:
> > +  ; CHECK: clz.4h v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1
> false) nounwind
> > +  ret <4 x i16> %vclz1.i
> > +}
> > +
> > +define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclz_u32:
> > +  ; CHECK: clz.2s v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1
> false) nounwind
> > +  ret <2 x i32> %vclz1.i
> > +}
> > +
> > +define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclz_s32:
> > +  ; CHECK: clz.2s v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1
> false) nounwind
> > +  ret <2 x i32> %vclz1.i
> > +}
> > +
> > +define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclzq_u8:
> > +  ; CHECK: clz.16b v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1
> false) nounwind
> > +  ret <16 x i8> %vclz.i
> > +}
> > +
> > +define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclzq_s8:
> > +  ; CHECK: clz.16b v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1
> false) nounwind
> > +  ret <16 x i8> %vclz.i
> > +}
> > +
> > +define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclzq_u16:
> > +  ; CHECK: clz.8h v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1
> false) nounwind
> > +  ret <8 x i16> %vclz1.i
> > +}
> > +
> > +define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclzq_s16:
> > +  ; CHECK: clz.8h v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1
> false) nounwind
> > +  ret <8 x i16> %vclz1.i
> > +}
> > +
> > +define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclzq_u32:
> > +  ; CHECK: clz.4s v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1
> false) nounwind
> > +  ret <4 x i32> %vclz1.i
> > +}
> > +
> > +define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
> > +  ; CHECK-LABEL: test_vclzq_s32:
> > +  ; CHECK: clz.4s v0, v0
> > +  ; CHECK-NEXT: ret
> > +  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1
> false) nounwind
> > +  ret <4 x i32> %vclz1.i
> > +}
> > +
> > +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
> > +
> > +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
> > +
> > +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
> > +
> > +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
> > +
> > +declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
> > +
> > +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcmp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcmp.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcmp.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcmp.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,227 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +
> > +define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind {
> > +;CHECK-LABEL: fcmltz_4s:
> > +;CHECK: fcmlt.4s [[REG:v[0-9]+]], v0, #0
> > +;CHECK-NEXT: xtn.4h v[[REG_1:[0-9]+]], [[REG]]
> > +;CHECK-NEXT: str d[[REG_1]], [x0]
> > +;CHECK-NEXT: ret
> > +  %tmp = fcmp olt <4 x float> %a, zeroinitializer
> > +  %tmp2 = sext <4 x i1> %tmp to <4 x i16>
> > +  store <4 x i16> %tmp2, <4 x i16>* %p, align 8
> > +  ret void
> > +}
> > +
> > +define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
> > +;CHECK-LABEL: facge_2s:
> > +;CHECK: facge.2s
> > +       %tmp1 = load <2 x float>* %A
> > +       %tmp2 = load <2 x float>* %B
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x
> float> %tmp1, <2 x float> %tmp2)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
> > +;CHECK-LABEL: facge_4s:
> > +;CHECK: facge.4s
> > +       %tmp1 = load <4 x float>* %A
> > +       %tmp2 = load <4 x float>* %B
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x
> float> %tmp1, <4 x float> %tmp2)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind
> {
> > +;CHECK-LABEL: facge_2d:
> > +;CHECK: facge.2d
> > +       %tmp1 = load <2 x double>* %A
> > +       %tmp2 = load <2 x double>* %B
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x
> double> %tmp1, <2 x double> %tmp2)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x float>, <2 x
> float>) nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x float>, <4 x
> float>) nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x double>, <2 x
> double>) nounwind readnone
> > +
> > +define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
> > +;CHECK-LABEL: facgt_2s:
> > +;CHECK: facgt.2s
> > +       %tmp1 = load <2 x float>* %A
> > +       %tmp2 = load <2 x float>* %B
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x
> float> %tmp1, <2 x float> %tmp2)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
> > +;CHECK-LABEL: facgt_4s:
> > +;CHECK: facgt.4s
> > +       %tmp1 = load <4 x float>* %A
> > +       %tmp2 = load <4 x float>* %B
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x
> float> %tmp1, <4 x float> %tmp2)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind
> {
> > +;CHECK-LABEL: facgt_2d:
> > +;CHECK: facgt.2d
> > +       %tmp1 = load <2 x double>* %A
> > +       %tmp2 = load <2 x double>* %B
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x
> double> %tmp1, <2 x double> %tmp2)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x
> float>) nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x
> float>) nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x
> double>) nounwind readnone
> > +
> > +define i32 @facge_s(float %A, float %B) nounwind {
> > +; CHECK-LABEL: facge_s:
> > +; CHECK: facge {{s[0-9]+}}, s0, s1
> > +  %mask = call i32 @llvm.arm64.neon.facge.i32.f32(float %A, float %B)
> > +  ret i32 %mask
> > +}
> > +
> > +define i64 @facge_d(double %A, double %B) nounwind {
> > +; CHECK-LABEL: facge_d:
> > +; CHECK: facge {{d[0-9]+}}, d0, d1
> > +  %mask = call i64 @llvm.arm64.neon.facge.i64.f64(double %A, double %B)
> > +  ret i64 %mask
> > +}
> > +
> > +declare i64 @llvm.arm64.neon.facge.i64.f64(double, double)
> > +declare i32 @llvm.arm64.neon.facge.i32.f32(float, float)
> > +
> > +define i32 @facgt_s(float %A, float %B) nounwind {
> > +; CHECK-LABEL: facgt_s:
> > +; CHECK: facgt {{s[0-9]+}}, s0, s1
> > +  %mask = call i32 @llvm.arm64.neon.facgt.i32.f32(float %A, float %B)
> > +  ret i32 %mask
> > +}
> > +
> > +define i64 @facgt_d(double %A, double %B) nounwind {
> > +; CHECK-LABEL: facgt_d:
> > +; CHECK: facgt {{d[0-9]+}}, d0, d1
> > +  %mask = call i64 @llvm.arm64.neon.facgt.i64.f64(double %A, double %B)
> > +  ret i64 %mask
> > +}
> > +
> > +declare i64 @llvm.arm64.neon.facgt.i64.f64(double, double)
> > +declare i32 @llvm.arm64.neon.facgt.i32.f32(float, float)
> > +
> > +define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> > +;CHECK-LABEL: cmtst_8b:
> > +;CHECK: cmtst.8b
> > +  %tmp1 = load <8 x i8>* %A
> > +  %tmp2 = load <8 x i8>* %B
> > +  %commonbits = and <8 x i8> %tmp1, %tmp2
> > +  %mask = icmp ne <8 x i8> %commonbits, zeroinitializer
> > +  %res = sext <8 x i1> %mask to <8 x i8>
> > +  ret <8 x i8> %res
> > +}
> > +
> > +define <16 x i8> @cmtst_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> > +;CHECK-LABEL: cmtst_16b:
> > +;CHECK: cmtst.16b
> > +  %tmp1 = load <16 x i8>* %A
> > +  %tmp2 = load <16 x i8>* %B
> > +  %commonbits = and <16 x i8> %tmp1, %tmp2
> > +  %mask = icmp ne <16 x i8> %commonbits, zeroinitializer
> > +  %res = sext <16 x i1> %mask to <16 x i8>
> > +  ret <16 x i8> %res
> > +}
> > +
> > +define <4 x i16> @cmtst_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> > +;CHECK-LABEL: cmtst_4h:
> > +;CHECK: cmtst.4h
> > +  %tmp1 = load <4 x i16>* %A
> > +  %tmp2 = load <4 x i16>* %B
> > +  %commonbits = and <4 x i16> %tmp1, %tmp2
> > +  %mask = icmp ne <4 x i16> %commonbits, zeroinitializer
> > +  %res = sext <4 x i1> %mask to <4 x i16>
> > +  ret <4 x i16> %res
> > +}
> > +
> > +define <8 x i16> @cmtst_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> > +;CHECK-LABEL: cmtst_8h:
> > +;CHECK: cmtst.8h
> > +  %tmp1 = load <8 x i16>* %A
> > +  %tmp2 = load <8 x i16>* %B
> > +  %commonbits = and <8 x i16> %tmp1, %tmp2
> > +  %mask = icmp ne <8 x i16> %commonbits, zeroinitializer
> > +  %res = sext <8 x i1> %mask to <8 x i16>
> > +  ret <8 x i16> %res
> > +}
> > +
> > +define <2 x i32> @cmtst_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> > +;CHECK-LABEL: cmtst_2s:
> > +;CHECK: cmtst.2s
> > +  %tmp1 = load <2 x i32>* %A
> > +  %tmp2 = load <2 x i32>* %B
> > +  %commonbits = and <2 x i32> %tmp1, %tmp2
> > +  %mask = icmp ne <2 x i32> %commonbits, zeroinitializer
> > +  %res = sext <2 x i1> %mask to <2 x i32>
> > +  ret <2 x i32> %res
> > +}
> > +
> > +define <4 x i32> @cmtst_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> > +;CHECK-LABEL: cmtst_4s:
> > +;CHECK: cmtst.4s
> > +  %tmp1 = load <4 x i32>* %A
> > +  %tmp2 = load <4 x i32>* %B
> > +  %commonbits = and <4 x i32> %tmp1, %tmp2
> > +  %mask = icmp ne <4 x i32> %commonbits, zeroinitializer
> > +  %res = sext <4 x i1> %mask to <4 x i32>
> > +  ret <4 x i32> %res
> > +}
> > +
> > +define <2 x i64> @cmtst_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
> > +;CHECK-LABEL: cmtst_2d:
> > +;CHECK: cmtst.2d
> > +  %tmp1 = load <2 x i64>* %A
> > +  %tmp2 = load <2 x i64>* %B
> > +  %commonbits = and <2 x i64> %tmp1, %tmp2
> > +  %mask = icmp ne <2 x i64> %commonbits, zeroinitializer
> > +  %res = sext <2 x i1> %mask to <2 x i64>
> > +  ret <2 x i64> %res
> > +}
> > +
> > +define <1 x i64> @fcmeq_d(<1 x double> %A, <1 x double> %B) nounwind {
> > +; CHECK-LABEL: fcmeq_d:
> > +; CHECK: fcmeq {{d[0-9]+}}, d0, d1
> > +  %tst = fcmp oeq <1 x double> %A, %B
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmge_d(<1 x double> %A, <1 x double> %B) nounwind {
> > +; CHECK-LABEL: fcmge_d:
> > +; CHECK: fcmge {{d[0-9]+}}, d0, d1
> > +  %tst = fcmp oge <1 x double> %A, %B
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmle_d(<1 x double> %A, <1 x double> %B) nounwind {
> > +; CHECK-LABEL: fcmle_d:
> > +; CHECK: fcmge {{d[0-9]+}}, d1, d0
> > +  %tst = fcmp ole <1 x double> %A, %B
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmgt_d(<1 x double> %A, <1 x double> %B) nounwind {
> > +; CHECK-LABEL: fcmgt_d:
> > +; CHECK: fcmgt {{d[0-9]+}}, d0, d1
> > +  %tst = fcmp ogt <1 x double> %A, %B
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> > +
> > +define <1 x i64> @fcmlt_d(<1 x double> %A, <1 x double> %B) nounwind {
> > +; CHECK-LABEL: fcmlt_d:
> > +; CHECK: fcmgt {{d[0-9]+}}, d1, d0
> > +  %tst = fcmp olt <1 x double> %A, %B
> > +  %mask = sext <1 x i1> %tst to <1 x i64>
> > +  ret <1 x i64> %mask
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcnt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcnt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcnt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcnt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,56 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
> > +;CHECK-LABEL: cls_8b:
> > +;CHECK: cls.8b
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp3 = call <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8> %tmp1)
> > +       ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
> > +;CHECK-LABEL: cls_16b:
> > +;CHECK: cls.16b
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp3 = call <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8>
> %tmp1)
> > +       ret <16 x i8> %tmp3
> > +}
> > +
> > +define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
> > +;CHECK-LABEL: cls_4h:
> > +;CHECK: cls.4h
> > +       %tmp1 = load <4 x i16>* %A
> > +       %tmp3 = call <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16>
> %tmp1)
> > +       ret <4 x i16> %tmp3
> > +}
> > +
> > +define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
> > +;CHECK-LABEL: cls_8h:
> > +;CHECK: cls.8h
> > +       %tmp1 = load <8 x i16>* %A
> > +       %tmp3 = call <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16>
> %tmp1)
> > +       ret <8 x i16> %tmp3
> > +}
> > +
> > +define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
> > +;CHECK-LABEL: cls_2s:
> > +;CHECK: cls.2s
> > +       %tmp1 = load <2 x i32>* %A
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32>
> %tmp1)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind {
> > +;CHECK-LABEL: cls_4s:
> > +;CHECK: cls.4s
> > +       %tmp1 = load <4 x i32>* %A
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32>
> %tmp1)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +declare <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8>) nounwind readnone
> > +declare <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8>) nounwind
> readnone
> > +declare <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16>) nounwind
> readnone
> > +declare <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16>) nounwind
> readnone
> > +declare <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32>) nounwind
> readnone
> > +declare <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32>) nounwind
> readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcombine.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcombine.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcombine.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcombine.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,17 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +; LowerCONCAT_VECTORS() was reversing the order of two parts.
> > +; rdar://11558157
> > +; rdar://11559553
> > +define <16 x i8> @test(<16 x i8> %q0, <16 x i8> %q1, i8* nocapture
> %dest) nounwind {
> > +entry:
> > +; CHECK-LABEL: test:
> > +; CHECK: ins.d v0[1], v1[0]
> > +  %0 = bitcast <16 x i8> %q0 to <2 x i64>
> > +  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32>
> zeroinitializer
> > +  %1 = bitcast <16 x i8> %q1 to <2 x i64>
> > +  %shuffle.i4 = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32>
> zeroinitializer
> > +  %shuffle.i3 = shufflevector <1 x i64> %shuffle.i, <1 x i64>
> %shuffle.i4, <2 x i32> <i32 0, i32 1>
> > +  %2 = bitcast <2 x i64> %shuffle.i3 to <16 x i8>
> > +  ret <16 x i8> %2
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcvt.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcvt.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcvt.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcvt.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,686 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtas_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtas.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtas_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtas.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtas_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtas.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtau_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtau.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtau_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtau.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtau_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtau.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtms_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtms.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtms_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtms.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtms_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtms.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtmu_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtmu.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtmu_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtmu.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtmu_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtmu.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtps_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtps.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtps_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtps.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtps_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtps.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtpu_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtpu.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtpu_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtpu.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtpu_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtpu.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtns_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtns.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtns_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtns.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtns_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtns.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtnu_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtnu.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x
> float> %A)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtnu_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtnu.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x
> float> %A)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtnu_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtnu.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x
> double> %A)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float>)
> nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float>)
> nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzs_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzs.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = fptosi <2 x float> %A to <2 x i32>
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzs_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzs.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = fptosi <4 x float> %A to <4 x i32>
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtzs_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzs.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = fptosi <2 x double> %A to <2 x i64>
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +
> > +define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzu_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzu.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = fptoui <2 x float> %A to <2 x i32>
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzu_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzu.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = fptoui <4 x float> %A to <4 x i32>
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtzu_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzu.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = fptoui <2 x double> %A to <2 x i64>
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +define <2 x float> @frinta_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: frinta_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frinta.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @frinta_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: frinta_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frinta.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @frinta_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: frinta_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: frinta.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.round.v2f32(<2 x float>) nounwind readnone
> > +declare <4 x float> @llvm.round.v4f32(<4 x float>) nounwind readnone
> > +declare <2 x double> @llvm.round.v2f64(<2 x double>) nounwind readnone
> > +
> > +define <2 x float> @frinti_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: frinti_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frinti.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @frinti_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: frinti_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frinti.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %A)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @frinti_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: frinti_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: frinti.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %A)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) nounwind readnone
> > +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
> > +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) nounwind
> readnone
> > +
> > +define <2 x float> @frintm_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: frintm_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintm.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @frintm_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: frintm_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintm.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @frintm_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: frintm_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintm.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.floor.v2f32(<2 x float>) nounwind readnone
> > +declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
> > +declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
> > +
> > +define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: frintn_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintn.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x
> float> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: frintn_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintn.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x
> float> %A)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: frintn_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintn.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x
> double> %A)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float>) nounwind
> readnone
> > +declare <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float>) nounwind
> readnone
> > +declare <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x float> @frintp_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: frintp_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintp.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @frintp_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: frintp_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintp.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @frintp_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: frintp_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintp.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
> > +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
> > +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
> > +
> > +define <2 x float> @frintx_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: frintx_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintx.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.rint.v2f32(<2 x float> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @frintx_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: frintx_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintx.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float> @llvm.rint.v4f32(<4 x float> %A)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @frintx_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: frintx_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintx.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double> @llvm.rint.v2f64(<2 x double> %A)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.rint.v2f32(<2 x float>) nounwind readnone
> > +declare <4 x float> @llvm.rint.v4f32(<4 x float>) nounwind readnone
> > +declare <2 x double> @llvm.rint.v2f64(<2 x double>) nounwind readnone
> > +
> > +define <2 x float> @frintz_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: frintz_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintz.2s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @frintz_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: frintz_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintz.4s v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @frintz_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: frintz_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: frintz.2d v0, v0
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
> > +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
> > +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
> > +
> > +define <2 x float> @fcvtxn_2s(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtxn_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtxn v0.2s, v0.2d
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2
> x double> %A)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @fcvtxn_4s(<2 x float> %ret, <2 x double> %A)
> nounwind {
> > +;CHECK-LABEL: fcvtxn_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtxn2 v0.4s, v1.2d
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2
> x double> %A)
> > +        %res = shufflevector <2 x float> %ret, <2 x float> %tmp3, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> > +       ret <4 x float> %res
> > +}
> > +
> > +declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define <2 x i32> @fcvtzsc_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzsc_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzs.2s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32>
> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %A, i32 1)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtzsc_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzsc_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzs.4s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32>
> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %A, i32 1)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtzsc_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtzsc_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzs.2d v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64>
> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %A, i32 1)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>,
> i32) nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>,
> i32) nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>,
> i32) nounwind readnone
> > +
> > +define <2 x i32> @fcvtzuc_2s(<2 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzuc_2s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzu.2s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i32>
> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %A, i32 1)
> > +       ret <2 x i32> %tmp3
> > +}
> > +
> > +define <4 x i32> @fcvtzuc_4s(<4 x float> %A) nounwind {
> > +;CHECK-LABEL: fcvtzuc_4s:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzu.4s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x i32>
> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %A, i32 1)
> > +       ret <4 x i32> %tmp3
> > +}
> > +
> > +define <2 x i64> @fcvtzuc_2d(<2 x double> %A) nounwind {
> > +;CHECK-LABEL: fcvtzuc_2d:
> > +;CHECK-NOT: ld1
> > +;CHECK: fcvtzu.2d v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x i64>
> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %A, i32 1)
> > +       ret <2 x i64> %tmp3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>,
> i32) nounwind readnone
> > +declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>,
> i32) nounwind readnone
> > +declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>,
> i32) nounwind readnone
> > +
> > +define <2 x float> @scvtf_2sc(<2 x i32> %A) nounwind {
> > +;CHECK-LABEL: scvtf_2sc:
> > +;CHECK-NOT: ld1
> > +;CHECK: scvtf.2s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float>
> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @scvtf_4sc(<4 x i32> %A) nounwind {
> > +;CHECK-LABEL: scvtf_4sc:
> > +;CHECK-NOT: ld1
> > +;CHECK: scvtf.4s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float>
> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @scvtf_2dc(<2 x i64> %A) nounwind {
> > +;CHECK-LABEL: scvtf_2dc:
> > +;CHECK-NOT: ld1
> > +;CHECK: scvtf.2d v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double>
> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>,
> i32) nounwind readnone
> > +declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>,
> i32) nounwind readnone
> > +declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>,
> i32) nounwind readnone
> > +
> > +define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind {
> > +;CHECK-LABEL: ucvtf_2sc:
> > +;CHECK-NOT: ld1
> > +;CHECK: ucvtf.2s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x float>
> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
> > +       ret <2 x float> %tmp3
> > +}
> > +
> > +define <4 x float> @ucvtf_4sc(<4 x i32> %A) nounwind {
> > +;CHECK-LABEL: ucvtf_4sc:
> > +;CHECK-NOT: ld1
> > +;CHECK: ucvtf.4s v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <4 x float>
> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
> > +       ret <4 x float> %tmp3
> > +}
> > +
> > +define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind {
> > +;CHECK-LABEL: ucvtf_2dc:
> > +;CHECK-NOT: ld1
> > +;CHECK: ucvtf.2d v0, v0, #1
> > +;CHECK-NEXT: ret
> > +       %tmp3 = call <2 x double>
> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
> > +       ret <2 x double> %tmp3
> > +}
> > +
> > +
> > +;CHECK-LABEL: autogen_SD28458:
> > +;CHECK: fcvt
> > +;CHECK: ret
> > +define void @autogen_SD28458() {
> > +  %Tr53 = fptrunc <8 x double> undef to <8 x float>
> > +  store <8 x float> %Tr53, <8 x float>* undef
> > +  ret void
> > +}
> > +
> > +;CHECK-LABEL: autogen_SD19225:
> > +;CHECK: fcvt
> > +;CHECK: ret
> > +define void @autogen_SD19225() {
> > +  %A = load <8 x float>* undef
> > +  %Tr53 = fpext <8 x float> %A to <8 x double>
> > +  store <8 x double> %Tr53, <8 x double>* undef
> > +  ret void
> > +}
> > +
> > +declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>,
> i32) nounwind readnone
> > +declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>,
> i32) nounwind readnone
> > +declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>,
> i32) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcvt_f.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcvt_f.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcvt_f.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcvt_f.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,82 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind
> readnone ssp {
> > +; CHECK-LABEL: test_vcvt_f64_f32:
> > +  %vcvt1.i = fpext <2 x float> %x to <2 x double>
> > +; CHECK: fcvtl v0.2d, v0.2s
> > +  ret <2 x double> %vcvt1.i
> > +; CHECK: ret
> > +}
> > +
> > +define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind
> readnone ssp {
> > +; CHECK-LABEL: test_vcvt_high_f64_f32:
> > +  %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32>
> <i32 2, i32 3>
> > +  %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
> > +; CHECK: fcvtl2        v0.2d, v0.4s
> > +  ret <2 x double> %vcvt1.i
> > +; CHECK: ret
> > +}
> > +
> > +define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind
> readnone ssp {
> > +; CHECK-LABEL: test_vcvt_f32_f64:
> > +  %vcvt1.i = fptrunc <2 x double> %v to <2 x float>
> > +; CHECK: fcvtn
> > +  ret <2 x float> %vcvt1.i
> > +; CHECK: ret
> > +}
> > +
> > +define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double>
> %v) nounwind readnone ssp {
> > +; CHECK-LABEL: test_vcvt_high_f32_f64:
> > +
> > +  %cvt = fptrunc <2 x double> %v to <2 x float>
> > +  %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> > +; CHECK: fcvtn2
> > +  ret <4 x float> %vcvt2.i
> > +; CHECK: ret
> > +}
> > +
> > +define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind
> readnone ssp {
> > +; CHECK-LABEL: test_vcvtx_f32_f64:
> > +  %vcvtx1.i = tail call <2 x float>
> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
> > +; CHECK: fcvtxn
> > +  ret <2 x float> %vcvtx1.i
> > +; CHECK: ret
> > +}
> > +
> > +define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x
> double> %v) nounwind readnone ssp {
> > +; CHECK-LABEL: test_vcvtx_high_f32_f64:
> > +  %vcvtx2.i = tail call <2 x float>
> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
> > +  %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> > +; CHECK: fcvtxn2
> > +  ret <4 x float> %res
> > +; CHECK: ret
> > +}
> > +
> > +
> > +declare <2 x double> @llvm.arm64.neon.vcvthighfp2df(<4 x float>)
> nounwind readnone
> > +declare <2 x double> @llvm.arm64.neon.vcvtfp2df(<2 x float>) nounwind
> readnone
> > +
> > +declare <2 x float> @llvm.arm64.neon.vcvtdf2fp(<2 x double>) nounwind
> readnone
> > +declare <4 x float> @llvm.arm64.neon.vcvthighdf2fp(<2 x float>, <2 x
> double>) nounwind readnone
> > +
> > +declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>)
> nounwind readnone
> > +
> > +define i16 @to_half(float %in) {
> > +; CHECK-LABEL: to_half:
> > +; CHECK: fcvt h[[HALFVAL:[0-9]+]], s0
> > +; CHECK: fmov w0, s[[HALFVAL]]
> > +
> > +  %res = call i16 @llvm.convert.to.fp16(float %in)
> > +  ret i16 %res
> > +}
> > +
> > +define float @from_half(i16 %in) {
> > +; CHECK-LABEL: from_half:
> > +; CHECK: fmov s[[HALFVAL:[0-9]+]], {{w[0-9]+}}
> > +; CHECK: fcvt s0, h[[HALFVAL]]
> > +  %res = call float @llvm.convert.from.fp16(i16 %in)
> > +  ret float %res
> > +}
> > +
> > +declare float @llvm.convert.from.fp16(i16) #1
> > +declare i16 @llvm.convert.to.fp16(float) #1
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcvt_f32_su32.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcvt_f32_su32.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcvt_f32_su32.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcvt_f32_su32.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,73 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <2 x float> @ucvt(<2 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: ucvt:
> > +; CHECK: ucvtf.2s  v0, v0
> > +; CHECK: ret
> > +
> > +  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
> > +  ret <2 x float> %vcvt.i
> > +}
> > +
> > +define <2 x float> @scvt(<2 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: scvt:
> > +; CHECK: scvtf.2s  v0, v0
> > +; CHECK: ret
> > +  %vcvt.i = sitofp <2 x i32> %a to <2 x float>
> > +  ret <2 x float> %vcvt.i
> > +}
> > +
> > +define <4 x float> @ucvtq(<4 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: ucvtq:
> > +; CHECK: ucvtf.4s  v0, v0
> > +; CHECK: ret
> > +  %vcvt.i = uitofp <4 x i32> %a to <4 x float>
> > +  ret <4 x float> %vcvt.i
> > +}
> > +
> > +define <4 x float> @scvtq(<4 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: scvtq:
> > +; CHECK: scvtf.4s  v0, v0
> > +; CHECK: ret
> > +  %vcvt.i = sitofp <4 x i32> %a to <4 x float>
> > +  ret <4 x float> %vcvt.i
> > +}
> > +
> > +define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: cvtf16:
> > +; CHECK: fcvtl  v0.4s, v0.4h
> > +; CHECK-NEXT: ret
> > +  %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>
> %a) nounwind
> > +  ret <4 x float> %vcvt1.i
> > +}
> > +
> > +define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: cvtf16_high:
> > +; CHECK: fcvtl2  v0.4s, v0.8h
> > +; CHECK-NEXT: ret
> > +  %in = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4,
> i32 5, i32 6, i32 7>
> > +  %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>
> %in) nounwind
> > +  ret <4 x float> %vcvt1.i
> > +}
> > +
> > +
> > +
> > +define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: cvtf16f32:
> > +; CHECK: fcvtn  v0.4h, v0.4s
> > +; CHECK-NEXT: ret
> > +  %vcvt1.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>
> %a) nounwind
> > +  ret <4 x i16> %vcvt1.i
> > +}
> > +
> > +define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x float> %high_big)
> {
> > +; CHECK-LABEL: cvtf16f32_high:
> > +; CHECK: fcvtn2 v0.8h, v1.4s
> > +; CHECK-NEXT: ret
> > +  %high = call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>
> %high_big)
> > +  %res = shufflevector <4 x i16> %low, <4 x i16> %high, <8 x i32> <i32
> 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> > +  ret <8 x i16> %res
> > +}
> > +
> > +declare <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>) nounwind
> readnone
> > +declare <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>) nounwind
> readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcvt_n.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcvt_n.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcvt_n.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcvt_n.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,49 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: cvtf32fxpu:
> > +; CHECK: ucvtf.2s      v0, v0, #9
> > +; CHECK: ret
> > +  %vcvt_n1 = tail call <2 x float>
> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 9)
> > +  ret <2 x float> %vcvt_n1
> > +}
> > +
> > +define <2 x float> @cvtf32fxps(<2 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: cvtf32fxps:
> > +; CHECK: scvtf.2s      v0, v0, #12
> > +; CHECK: ret
> > +  %vcvt_n1 = tail call <2 x float>
> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 12)
> > +  ret <2 x float> %vcvt_n1
> > +}
> > +
> > +define <4 x float> @cvtqf32fxpu(<4 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: cvtqf32fxpu:
> > +; CHECK: ucvtf.4s      v0, v0, #18
> > +; CHECK: ret
> > +  %vcvt_n1 = tail call <4 x float>
> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 18)
> > +  ret <4 x float> %vcvt_n1
> > +}
> > +
> > +define <4 x float> @cvtqf32fxps(<4 x i32> %a) nounwind readnone ssp {
> > +; CHECK-LABEL: cvtqf32fxps:
> > +; CHECK: scvtf.4s      v0, v0, #30
> > +; CHECK: ret
> > +  %vcvt_n1 = tail call <4 x float>
> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 30)
> > +  ret <4 x float> %vcvt_n1
> > +}
> > +define <2 x double> @f1(<2 x i64> %a) nounwind readnone ssp {
> > +  %vcvt_n1 = tail call <2 x double>
> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 12)
> > +  ret <2 x double> %vcvt_n1
> > +}
> > +
> > +define <2 x double> @f2(<2 x i64> %a) nounwind readnone ssp {
> > +  %vcvt_n1 = tail call <2 x double>
> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 9)
> > +  ret <2 x double> %vcvt_n1
> > +}
> > +
> > +declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>,
> i32) nounwind readnone
> > +declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>,
> i32) nounwind readnone
> > +declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>,
> i32) nounwind readnone
> > +declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>,
> i32) nounwind readnone
> > +declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>,
> i32) nounwind readnone
> > +declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>,
> i32) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcvt_su32_f32.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcvt_su32_f32.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcvt_su32_f32.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcvt_su32_f32.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,34 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <2 x i32> @c1(<2 x float> %a) nounwind readnone ssp {
> > +; CHECK: c1
> > +; CHECK: fcvtzs.2s     v0, v0
> > +; CHECK: ret
> > +  %vcvt.i = fptosi <2 x float> %a to <2 x i32>
> > +  ret <2 x i32> %vcvt.i
> > +}
> > +
> > +define <2 x i32> @c2(<2 x float> %a) nounwind readnone ssp {
> > +; CHECK: c2
> > +; CHECK: fcvtzu.2s     v0, v0
> > +; CHECK: ret
> > +  %vcvt.i = fptoui <2 x float> %a to <2 x i32>
> > +  ret <2 x i32> %vcvt.i
> > +}
> > +
> > +define <4 x i32> @c3(<4 x float> %a) nounwind readnone ssp {
> > +; CHECK: c3
> > +; CHECK: fcvtzs.4s     v0, v0
> > +; CHECK: ret
> > +  %vcvt.i = fptosi <4 x float> %a to <4 x i32>
> > +  ret <4 x i32> %vcvt.i
> > +}
> > +
> > +define <4 x i32> @c4(<4 x float> %a) nounwind readnone ssp {
> > +; CHECK: c4
> > +; CHECK: fcvtzu.4s     v0, v0
> > +; CHECK: ret
> > +  %vcvt.i = fptoui <4 x float> %a to <4 x i32>
> > +  ret <4 x i32> %vcvt.i
> > +}
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vcvtxd_f32_f64.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vcvtxd_f32_f64.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vcvtxd_f32_f64.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vcvtxd_f32_f64.ll Sat Mar 29 05:18:08
> 2014
> > @@ -0,0 +1,11 @@
> > +; RUN: llc < %s -march=arm64 | FileCheck %s
> > +
> > +define float @fcvtxn(double %a) {
> > +; CHECK-LABEL: fcvtxn:
> > +; CHECK: fcvtxn s0, d0
> > +; CHECK-NEXT: ret
> > +  %vcvtxd.i = tail call float @llvm.arm64.sisd.fcvtxn(double %a)
> nounwind
> > +  ret float %vcvtxd.i
> > +}
> > +
> > +declare float @llvm.arm64.sisd.fcvtxn(double) nounwind readnone
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vecCmpBr.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vecCmpBr.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vecCmpBr.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vecCmpBr.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,207 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
> > +; ModuleID = 'arm64_vecCmpBr.c'
> > +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
> > +target triple = "arm64-apple-ios3.0.0"
> > +
> > +
> > +define i32 @anyZero64(<4 x i16> %a) #0 {
> > +; CHECK: _anyZero64:
> > +; CHECK: uminv.8b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: b _bar
> > +entry:
> > +  %0 = bitcast <4 x i16> %a to <8 x i8>
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0)
> #3
> > +  %1 = trunc i32 %vminv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %if.then, label %return
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +declare i32 @bar(...) #1
> > +
> > +define i32 @anyZero128(<8 x i16> %a) #0 {
> > +; CHECK: _anyZero128:
> > +; CHECK: uminv.16b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: b _bar
> > +
> > +entry:
> > +  %0 = bitcast <8 x i16> %a to <16 x i8>
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>
> %0) #3
> > +  %1 = trunc i32 %vminv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %if.then, label %return
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @anyNonZero64(<4 x i16> %a) #0 {
> > +; CHECK: _anyNonZero64:
> > +; CHECK: umaxv.8b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: movz w0, #0
> > +
> > +entry:
> > +  %0 = bitcast <4 x i16> %a to <8 x i8>
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0)
> #3
> > +  %1 = trunc i32 %vmaxv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @anyNonZero128(<8 x i16> %a) #0 {
> > +; CHECK: _anyNonZero128:
> > +; CHECK: umaxv.16b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: movz w0, #0
> > +entry:
> > +  %0 = bitcast <8 x i16> %a to <16 x i8>
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>
> %0) #3
> > +  %1 = trunc i32 %vmaxv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @allZero64(<4 x i16> %a) #0 {
> > +; CHECK: _allZero64:
> > +; CHECK: umaxv.8b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: b _bar
> > +entry:
> > +  %0 = bitcast <4 x i16> %a to <8 x i8>
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0)
> #3
> > +  %1 = trunc i32 %vmaxv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %if.then, label %return
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @allZero128(<8 x i16> %a) #0 {
> > +; CHECK: _allZero128:
> > +; CHECK: umaxv.16b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: b _bar
> > +entry:
> > +  %0 = bitcast <8 x i16> %a to <16 x i8>
> > +  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>
> %0) #3
> > +  %1 = trunc i32 %vmaxv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %if.then, label %return
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @allNonZero64(<4 x i16> %a) #0 {
> > +; CHECK: _allNonZero64:
> > +; CHECK: uminv.8b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: movz w0, #0
> > +entry:
> > +  %0 = bitcast <4 x i16> %a to <8 x i8>
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0)
> #3
> > +  %1 = trunc i32 %vminv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +define i32 @allNonZero128(<8 x i16> %a) #0 {
> > +; CHECK: _allNonZero128:
> > +; CHECK: uminv.16b b[[REGNO1:[0-9]+]], v0
> > +; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
> > +; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
> > +; CHECK: [[LABEL]]:
> > +; CHECK-NEXT: movz w0, #0
> > +entry:
> > +  %0 = bitcast <8 x i16> %a to <16 x i8>
> > +  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>
> %0) #3
> > +  %1 = trunc i32 %vminv.i to i8
> > +  %tobool = icmp eq i8 %1, 0
> > +  br i1 %tobool, label %return, label %if.then
> > +
> > +if.then:                                          ; preds = %entry
> > +  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
> > +  br label %return
> > +
> > +return:                                           ; preds = %entry,
> %if.then
> > +  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
> > +  ret i32 %retval.0
> > +}
> > +
> > +declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) #2
> > +
> > +declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) #2
> > +
> > +declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) #2
> > +
> > +declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) #2
> > +
> > +attributes #0 = { nounwind ssp "target-cpu"="cyclone" }
> > +attributes #1 = { "target-cpu"="cyclone" }
> > +attributes #2 = { nounwind readnone }
> > +attributes #3 = { nounwind }
> > +attributes #4 = { nobuiltin nounwind }
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vecFold.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vecFold.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vecFold.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vecFold.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,145 @@
> > +; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s| FileCheck %s
> > +
> > +define <16 x i8> @foov16i8(<8 x i16> %a0, <8 x i16> %b0) nounwind
> readnone ssp {
> > +; CHECK-LABEL: foov16i8:
> > +  %vshrn_low_shift = lshr <8 x i16> %a0, <i16 5, i16 5, i16 5, i16 5,
> i16 5, i16 5, i16 5, i16 5>
> > +  %vshrn_low = trunc <8 x i16> %vshrn_low_shift to <8 x i8>
> > +  %vshrn_high_shift = lshr <8 x i16> %b0, <i16 5, i16 5, i16 5, i16 5,
> i16 5, i16 5, i16 5, i16 5>
> > +  %vshrn_high = trunc <8 x i16> %vshrn_high_shift to <8 x i8>
> > +; CHECK: shrn.8b v0, v0, #5
> > +; CHECK-NEXT: shrn2.16b v0, v1, #5
> > +; CHECK-NEXT: ret
> > +  %1 = bitcast <8 x i8> %vshrn_low to <1 x i64>
> > +  %2 = bitcast <8 x i8> %vshrn_high to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
> > +  ret <16 x i8> %3
> > +}
> > +
> > +define <8 x i16> @foov8i16(<4 x i32> %a0, <4 x i32> %b0) nounwind
> readnone ssp {
> > +; CHECK-LABEL: foov8i16:
> > +  %vshrn_low_shift = lshr <4 x i32> %a0, <i32 5, i32 5, i32 5, i32 5>
> > +  %vshrn_low = trunc <4 x i32> %vshrn_low_shift to <4 x i16>
> > +  %vshrn_high_shift = lshr <4 x i32> %b0, <i32 5, i32 5, i32 5, i32 5>
> > +  %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16>
> > +; CHECK: shrn.4h v0, v0, #5
> > +; CHECK-NEXT: shrn2.8h v0, v1, #5
> > +; CHECK-NEXT: ret
> > +  %1 = bitcast <4 x i16> %vshrn_low to <1 x i64>
> > +  %2 = bitcast <4 x i16> %vshrn_high to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
> > +  ret <8 x i16> %3
> > +}
> > +
> > +define <4 x i32> @foov4i32(<2 x i64> %a0, <2 x i64> %b0) nounwind
> readnone ssp {
> > +; CHECK-LABEL: foov4i32:
> > +  %vshrn_low_shift = lshr <2 x i64> %a0, <i64 5, i64 5>
> > +  %vshrn_low = trunc <2 x i64> %vshrn_low_shift to <2 x i32>
> > +  %vshrn_high_shift = lshr <2 x i64> %b0, <i64 5, i64 5>
> > +  %vshrn_high = trunc <2 x i64> %vshrn_high_shift to <2 x i32>
> > +; CHECK: shrn.2s v0, v0, #5
> > +; CHECK-NEXT: shrn2.4s v0, v1, #5
> > +; CHECK-NEXT: ret
> > +  %1 = bitcast <2 x i32> %vshrn_low to <1 x i64>
> > +  %2 = bitcast <2 x i32> %vshrn_high to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
> > +  ret <4 x i32> %3
> > +}
> > +
> > +define <8 x i16> @bar(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x
> i32> %b1) nounwind readnone ssp {
> > +; CHECK-LABEL: bar:
> > +  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x
> i32> %a0, <4 x i32> %a1) nounwind
> > +  %vaddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x
> i32> %b0, <4 x i32> %b1) nounwind
> > +; CHECK: addhn.4h      v0, v0, v1
> > +; CHECK-NEXT: addhn2.8h        v0, v2, v3
> > +; CHECK-NEXT: ret
> > +  %1 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
> > +  %2 = bitcast <4 x i16> %vaddhn2.i10 to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
> > +  ret <8 x i16> %3
> > +}
> > +
> > +define <8 x i16> @baz(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x
> i32> %b1) nounwind readnone ssp {
> > +; CHECK-LABEL: baz:
> > +  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x
> i32> %a0, <4 x i32> %a1) nounwind
> > +  %vshrn_high_shift = ashr <4 x i32> %b0, <i32 5, i32 5, i32 5, i32 5>
> > +  %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16>
> > +; CHECK: addhn.4h      v0, v0, v1
> > +; CHECK-NEXT: shrn2.8h v0, v2, #5
> > +; CHECK-NEXT: ret
> > +  %1 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
> > +  %2 = bitcast <4 x i16> %vshrn_high to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
> > +  ret <8 x i16> %3
> > +}
> > +
> > +define <8 x i16> @raddhn(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0,
> <4 x i32> %b1) nounwind readnone ssp {
> > +; CHECK-LABEL: raddhn:
> > +entry:
> > +; CHECK:       raddhn.4h       v0, v0, v1
> > +; CHECK-NEXT:  raddhn2.8h      v0, v2, v3
> > +; CHECK-NEXT:  ret
> > +  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x
> i32> %a0, <4 x i32> %a1) nounwind
> > +  %vraddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4
> x i32> %b0, <4 x i32> %b1) nounwind
> > +  %0 = bitcast <4 x i16> %vraddhn2.i to <1 x i64>
> > +  %1 = bitcast <4 x i16> %vraddhn2.i10 to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32
> 0, i32 1>
> > +  %2 = bitcast <2 x i64> %shuffle.i to <8 x i16>
> > +  ret <8 x i16> %2
> > +}
> > +
> > +define <8 x i16> @vrshrn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0,
> <8 x i16> %b1) nounwind readnone ssp {
> > +; CHECK-LABEL: vrshrn:
> > +; CHECK: rshrn.8b      v0, v0, #5
> > +; CHECK-NEXT: rshrn2.16b       v0, v2, #6
> > +; CHECK-NEXT: ret
> > +  %vrshrn_n1 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>
> %a0, i32 5)
> > +  %vrshrn_n4 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>
> %b0, i32 6)
> > +  %1 = bitcast <8 x i8> %vrshrn_n1 to <1 x i64>
> > +  %2 = bitcast <8 x i8> %vrshrn_n4 to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
> > +  ret <8 x i16> %3
> > +}
> > +
> > +define <8 x i16> @vrsubhn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0,
> <8 x i16> %b1) nounwind readnone ssp {
> > +; CHECK-LABEL: vrsubhn:
> > +; CHECK: rsubhn.8b     v0, v0, v1
> > +; CHECK: rsubhn2.16b   v0, v2, v3
> > +; CHECK-NEXT:  ret
> > +  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x
> i16> %a0, <8 x i16> %a1) nounwind
> > +  %vrsubhn2.i10 = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x
> i16> %b0, <8 x i16> %b1) nounwind
> > +  %1 = bitcast <8 x i8> %vrsubhn2.i to <1 x i64>
> > +  %2 = bitcast <8 x i8> %vrsubhn2.i10 to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
> > +  ret <8 x i16> %3
> > +}
> > +
> > +define <8 x i16> @noOpt1(<2 x i32> %a0, <2 x i32> %a1, <4 x i32> %b0,
> <4 x i32> %b1) nounwind readnone ssp {
> > +; CHECK-LABEL: noOpt1:
> > +  %vqsub2.i = tail call <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x
> i32> %a0, <2 x i32> %a1) nounwind
> > +  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x
> i32> %b0, <4 x i32> %b1) nounwind
> > +; CHECK:       sqsub.2s        v0, v0, v1
> > +; CHECK-NEXT:  addhn2.8h       v0, v2, v3
> > +  %1 = bitcast <2 x i32> %vqsub2.i to <1 x i64>
> > +  %2 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
> > +  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32
> 0, i32 1>
> > +  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
> > +  ret <8 x i16> %3
> > +}
> > +
> > +declare <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>)
> nounwind readnone
> > +
> > +declare <8 x i8> @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind
> readnone
> > +declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind
> readnone
> > +declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind
> readnone
> > +declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>)
> nounwind readnone
> > +declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
> nounwind readnone
> > +declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind
> readnone
> > +declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
> nounwind readnone
> > +
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vector-ext.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vector-ext.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vector-ext.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vector-ext.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,16 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +;CHECK: @func30
> > +;CHECK: ushll.4s  v0, v0, #0
> > +;CHECK: movi.4s v1, #1
> > +;CHECK: and.16b v0, v0, v1
> > +;CHECK: str  q0, [x0]
> > +;CHECK: ret
> > +
> > +%T0_30 = type <4 x i1>
> > +%T1_30 = type <4 x i32>
> > +define void @func30(%T0_30 %v0, %T1_30* %p1) {
> > +  %r = zext %T0_30 %v0 to %T1_30
> > +  store %T1_30 %r, %T1_30* %p1
> > +  ret void
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vector-imm.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vector-imm.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vector-imm.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vector-imm.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,134 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
> > +
> > +define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
> > +; CHECK-LABEL: v_orrimm:
> > +; CHECK-NOT: mov
> > +; CHECK-NOT: mvn
> > +; CHECK: orr
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0,
> i8 0, i8 1>
> > +       ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
> > +; CHECK: v_orrimmQ
> > +; CHECK-NOT: mov
> > +; CHECK-NOT: mvn
> > +; CHECK: orr
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0,
> i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
> > +       ret <16 x i8> %tmp3
> > +}
> > +
> > +define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
> > +; CHECK-LABEL: v_bicimm:
> > +; CHECK-NOT: mov
> > +; CHECK-NOT: mvn
> > +; CHECK: bic
> > +       %tmp1 = load <8 x i8>* %A
> > +       %tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1,
> i8 -1, i8 -1, i8 0 >
> > +       ret <8 x i8> %tmp3
> > +}
> > +
> > +define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
> > +; CHECK-LABEL: v_bicimmQ:
> > +; CHECK-NOT: mov
> > +; CHECK-NOT: mvn
> > +; CHECK: bic
> > +       %tmp1 = load <16 x i8>* %A
> > +       %tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1,
> i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
> > +       ret <16 x i8> %tmp3
> > +}
> > +
> > +define <2 x double> @foo(<2 x double> %bar) nounwind {
> > +; CHECK: foo
> > +; CHECK: fmov.2d       v1, #1.000000e+00
> > +  %add = fadd <2 x double> %bar, <double 1.0, double 1.0>
> > +  ret <2 x double> %add
> > +}
> > +
> > +define <4 x i32> @movi_4s_imm_t1() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_4s_imm_t1:
> > +; CHECK: movi.4s v0, #75
> > +  ret <4 x i32> <i32 75, i32 75, i32 75, i32 75>
> > +}
> > +
> > +define <4 x i32> @movi_4s_imm_t2() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_4s_imm_t2:
> > +; CHECK: movi.4s v0, #75, lsl #8
> > +  ret <4 x i32> <i32 19200, i32 19200, i32 19200, i32 19200>
> > +}
> > +
> > +define <4 x i32> @movi_4s_imm_t3() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_4s_imm_t3:
> > +; CHECK: movi.4s v0, #75, lsl #16
> > +  ret <4 x i32> <i32 4915200, i32 4915200, i32 4915200, i32 4915200>
> > +}
> > +
> > +define <4 x i32> @movi_4s_imm_t4() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_4s_imm_t4:
> > +; CHECK: movi.4s v0, #75, lsl #24
> > +  ret <4 x i32> <i32 1258291200, i32 1258291200, i32 1258291200, i32
> 1258291200>
> > +}
> > +
> > +define <8 x i16> @movi_8h_imm_t5() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_8h_imm_t5:
> > +; CHECK: movi.8h v0, #75
> > +  ret <8 x i16> <i16 75, i16 75, i16 75, i16 75, i16 75, i16 75, i16
> 75, i16 75>
> > +}
> > +
> > +; rdar://11989841
> > +define <8 x i16> @movi_8h_imm_t6() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_8h_imm_t6:
> > +; CHECK: movi.8h v0, #75, lsl #8
> > +  ret <8 x i16> <i16 19200, i16 19200, i16 19200, i16 19200, i16 19200,
> i16 19200, i16 19200, i16 19200>
> > +}
> > +
> > +define <4 x i32> @movi_4s_imm_t7() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_4s_imm_t7:
> > +; CHECK: movi.4s v0, #75, msl #8
> > +ret <4 x i32> <i32 19455, i32 19455, i32 19455, i32 19455>
> > +}
> > +
> > +define <4 x i32> @movi_4s_imm_t8() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_4s_imm_t8:
> > +; CHECK: movi.4s v0, #75, msl #16
> > +ret <4 x i32> <i32 4980735, i32 4980735, i32 4980735, i32 4980735>
> > +}
> > +
> > +define <16 x i8> @movi_16b_imm_t9() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_16b_imm_t9:
> > +; CHECK: movi.16b v0, #75
> > +ret <16 x i8> <i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75,
> > +               i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75>
> > +}
> > +
> > +define <2 x i64> @movi_2d_imm_t10() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_2d_imm_t10:
> > +; CHECK: movi.2d v0, #0xff00ff00ff00ff
> > +ret <2 x i64> <i64 71777214294589695, i64 71777214294589695>
> > +}
> > +
> > +define <4 x i32> @movi_4s_imm_t11() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_4s_imm_t11:
> > +; CHECK: fmov.4s v0, #-3.281250e-01
> > +ret <4 x i32> <i32 3198681088, i32 3198681088, i32 3198681088, i32
> 3198681088>
> > +}
> > +
> > +define <2 x i64> @movi_2d_imm_t12() nounwind readnone ssp {
> > +entry:
> > +; CHECK-LABEL: movi_2d_imm_t12:
> > +; CHECK: fmov.2d v0, #-1.718750e-01
> > +ret <2 x i64> <i64 13818732506632945664, i64 13818732506632945664>
> > +}
> >
> > Added: llvm/trunk/test/CodeGen/ARM64/vector-ldst.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/vector-ldst.ll?rev=205090&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/ARM64/vector-ldst.ll (added)
> > +++ llvm/trunk/test/CodeGen/ARM64/vector-ldst.ll Sat Mar 29 05:18:08 2014
> > @@ -0,0 +1,601 @@
> > +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
> -verify-machineinstrs | FileCheck %s
> > +
> > +; rdar://9428579
> > +
> > +%type1 = type { <16 x i8> }
> > +%type2 = type { <8 x i8> }
> > +%type3 = type { <4 x i16> }
> > +
> > +
> > +define hidden fastcc void @t1(%type1** %argtable) nounwind {
> > +entry:
> > +; CHECK-LABEL: t1:
> > +; CHECK: ldr x[[REG:[0-9]+]], [x0]
> > +; CHECK: str q0, [x[[REG]]]
> > +  %tmp1 = load %type1** %argtable, align 8
> > +  %tmp2 = getelementptr inbounds %type1* %tmp1, i64 0, i32 0
> > +  store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
> > +  ret void
> > +}
> > +
> > +define hidden fastcc void @t2(%type2** %argtable) nounwind {
> > +entry:
> > +; CHECK-LABEL: t2:
> > +; CHECK: ldr x[[REG:[0-9]+]], [x0]
> > +; CHECK: str d0, [x[[REG]]]
> > +  %tmp1 = load %type2** %argtable, align 8
> > +  %tmp2 = getelementptr inbounds %type2* %tmp1, i64 0, i32 0
> > +  store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
> > +  ret void
> > +}
> > +
> > +; add a bunch of tests for rdar://11246289
> > +
> > + at globalArray64x2 = common global <2 x i64>* null, align 8
> > + at globalArray32x4 = common global <4 x i32>* null, align 8
> > + at globalArray16x8 = common global <8 x i16>* null, align 8
> > + at globalArray8x16 = common global <16 x i8>* null, align 8
> > + at globalArray64x1 = common global <1 x i64>* null, align 8
> > + at globalArray32x2 = common global <2 x i32>* null, align 8
> > + at globalArray16x4 = common global <4 x i16>* null, align 8
> > + at globalArray8x8 = common global <8 x i8>* null, align 8
> > + at floatglobalArray64x2 = common global <2 x double>* null, align 8
> > + at floatglobalArray32x4 = common global <4 x float>* null, align 8
> > + at floatglobalArray64x1 = common global <1 x double>* null, align 8
> > + at floatglobalArray32x2 = common global <2 x float>* null, align 8
> > +
> > +define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset)
> nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_64x2:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 %offset
> > +  %tmp = load <2 x i64>* %arrayidx, align 16
> > +  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
> > +  %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 %offset
> > +  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct2_64x2:
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
> > +  %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 3
> > +  %tmp = load <2 x i64>* %arrayidx, align 16
> > +  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
> > +  %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 5
> > +  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset)
> nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_32x4:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 %offset
> > +  %tmp = load <4 x i32>* %arrayidx, align 16
> > +  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
> > +  %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 %offset
> > +  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct2_32x4:
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
> > +  %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 3
> > +  %tmp = load <4 x i32>* %arrayidx, align 16
> > +  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
> > +  %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 5
> > +  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset)
> nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_16x8:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 %offset
> > +  %tmp = load <8 x i16>* %arrayidx, align 16
> > +  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
> > +  %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 %offset
> > +  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct2_16x8:
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
> > +  %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 3
> > +  %tmp = load <8 x i16>* %arrayidx, align 16
> > +  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
> > +  %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 5
> > +  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset)
> nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_8x16:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 %offset
> > +  %tmp = load <16 x i8>* %arrayidx, align 16
> > +  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
> > +  %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 %offset
> > +  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct2_8x16:
> > +; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
> > +  %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 3
> > +  %tmp = load <16 x i8>* %arrayidx, align 16
> > +  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
> > +  %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 5
> > +  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
> > +  ret void
> > +}
> > +
> > +define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset)
> nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_64x1:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
> > +; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 %offset
> > +  %tmp = load <1 x i64>* %arrayidx, align 8
> > +  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
> > +  %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 %offset
> > +  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
> > +  ret void
> > +}
> > +
> > +define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct2_64x1:
> > +; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
> > +  %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 3
> > +  %tmp = load <1 x i64>* %arrayidx, align 8
> > +  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
> > +  %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 5
> > +  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
> > +  ret void
> > +}
> > +
> > +define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset)
> nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_32x2:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
> > +; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 %offset
> > +  %tmp = load <2 x i32>* %arrayidx, align 8
> > +  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
> > +  %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 %offset
> > +  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
> > +  ret void
> > +}
> > +
> > +define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct2_32x2:
> > +; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
> > +  %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 3
> > +  %tmp = load <2 x i32>* %arrayidx, align 8
> > +  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
> > +  %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 5
> > +  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
> > +  ret void
> > +}
> > +
> > +define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset)
> nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_16x4:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
> > +; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 %offset
> > +  %tmp = load <4 x i16>* %arrayidx, align 8
> > +  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
> > +  %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 %offset
> > +  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
> > +  ret void
> > +}
> > +
> > +define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct2_16x4:
> > +; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
> > +  %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 3
> > +  %tmp = load <4 x i16>* %arrayidx, align 8
> > +  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
> > +  %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 5
> > +  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
> > +  ret void
> > +}
> > +
> > +define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind
> ssp {
> > +entry:
> > +; CHECK-LABEL: fct1_8x8:
> > +; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
> > +; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
> > +; CHECK: ldr [[BASE:x[0-9]+]],
> > +; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
> > +  %arrayidx = getelementptr inbounds <8 x i8>* %array, i64 %offset
> > +  %tmp = load <8 x i8>* %arrayidx, align 8
> > +  %tmp1 = load <8 x i8>** @globalArray8x8, align 8
> > +  %arrayidx1 = getelementptr inbounds <8 x i8>* %tmp1, i64 %offset
> > +  store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
> > +  ret void
> > +}
> > +
> > +; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
> > +; registers for unscaled vector accesses
> > + at str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for
> D and Q registers\00", align 1
> > +
> > +define <1 x i64> @fct0() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct0:
> > +; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <1 x i64>*), align 8
> > +  ret <1 x i64> %0
> > +}
> > +
> > +define <2 x i32> @fct1() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct1:
> > +; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <2 x i32>*), align 8
> > +  ret <2 x i32> %0
> > +}
> > +
> > +define <4 x i16> @fct2() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct2:
> > +; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <4 x i16>*), align 8
> > +  ret <4 x i16> %0
> > +}
> > +
> > +define <8 x i8> @fct3() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct3:
> > +; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <8 x i8>*), align 8
> > +  ret <8 x i8> %0
> > +}
> > +
> > +define <2 x i64> @fct4() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct4:
> > +; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <2 x i64>*), align 16
> > +  ret <2 x i64> %0
> > +}
> > +
> > +define <4 x i32> @fct5() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct5:
> > +; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <4 x i32>*), align 16
> > +  ret <4 x i32> %0
> > +}
> > +
> > +define <8 x i16> @fct6() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct6:
> > +; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <8 x i16>*), align 16
> > +  ret <8 x i16> %0
> > +}
> > +
> > +define <16 x i8> @fct7() nounwind readonly ssp {
> > +entry:
> > +; CHECK-LABEL: fct7:
> > +; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
> > +  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <16 x i8>*), align 16
> > +  ret <16 x i8> %0
> > +}
> > +
> > +define void @fct8() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct8:
> > +; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <1 x i64>*), align 8
> > +  store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds
> ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
> > +  ret void
> > +}
> > +
> > +define void @fct9() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct9:
> > +; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <2 x i32>*), align 8
> > +  store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds
> ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
> > +  ret void
> > +}
> > +
> > +define void @fct10() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct10:
> > +; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <4 x i16>*), align 8
> > +  store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds
> ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
> > +  ret void
> > +}
> > +
> > +define void @fct11() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct11:
> > +; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <8 x i8>*), align 8
> > +  store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63
> x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
> > +  ret void
> > +}
> > +
> > +define void @fct12() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct12:
> > +; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <2 x i64>*), align 16
> > +  store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds
> ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
> > +  ret void
> > +}
> > +
> > +define void @fct13() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct13:
> > +; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <4 x i32>*), align 16
> > +  store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds
> ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
> > +  ret void
> > +}
> > +
> > +define void @fct14() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct14:
> > +; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <8 x i16>*), align 16
> > +  store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds
> ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
> > +  ret void
> > +}
> > +
> > +define void @fct15() nounwind ssp {
> > +entry:
> > +; CHECK-LABEL: fct15:
> > +; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
> > +; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
> > +  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]*
> @str, i64 0, i64 3) to <16 x i8>*), align 16
> > +  store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds
> ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
> > +  ret void
> > +}
> > +
> > +; Check the building of vector from a single loaded value.
> > +; Part of <rdar://problem/14170854>
> > +;
> > +; Single loads with immediate offset.
> > +define <8 x i8> @fct16(i8* nocapture %sp0) {
> > +; CHECK-LABEL: fct16:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <8 x i8> %vec, %vec
> > +  ret <8 x i8> %vmull.i
> > +}
> > +
> > +define <16 x i8> @fct17(i8* nocapture %sp0) {
> > +; CHECK-LABEL: fct17:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
> > +; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <16 x i8> %vec, %vec
> > +  ret <16 x i8> %vmull.i
> > +}
> > +
> > +define <4 x i16> @fct18(i16* nocapture %sp0) {
> > +; CHECK-LABEL: fct18:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
> > +; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <4 x i16> %vec, %vec
> > +  ret <4 x i16> %vmull.i
> > +}
> > +
> > +define <8 x i16> @fct19(i16* nocapture %sp0) {
> > +; CHECK-LABEL: fct19:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
> > +; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <8 x i16> %vec, %vec
> > +  ret <8 x i16> %vmull.i
> > +}
> > +
> > +define <2 x i32> @fct20(i32* nocapture %sp0) {
> > +; CHECK-LABEL: fct20:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
> > +; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <2 x i32> %vec, %vec
> > +  ret <2 x i32> %vmull.i
> > +}
> > +
> > +define <4 x i32> @fct21(i32* nocapture %sp0) {
> > +; CHECK-LABEL: fct21:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
> > +; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <4 x i32> %vec, %vec
> > +  ret <4 x i32> %vmull.i
> > +}
> > +
> > +define <1 x i64> @fct22(i64* nocapture %sp0) {
> > +; CHECK-LABEL: fct22:
> > +; CHECK: ldr d0, [x0, #8]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
> > +   ret <1 x i64> %vec
> > +}
> > +
> > +define <2 x i64> @fct23(i64* nocapture %sp0) {
> > +; CHECK-LABEL: fct23:
> > +; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 1
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
> > +  ret <2 x i64> %vec
> > +}
> > +
> > +;
> > +; Single loads with register offset.
> > +define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct24:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
> > +; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <8 x i8> %vec, %vec
> > +  ret <8 x i8> %vmull.i
> > +}
> > +
> > +define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct25:
> > +; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
> > +; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i8* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i8* %addr, align 1
> > +  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <16 x i8> %vec, %vec
> > +  ret <16 x i8> %vmull.i
> > +}
> > +
> > +define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct26:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
> > +; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <4 x i16> %vec, %vec
> > +  ret <4 x i16> %vmull.i
> > +}
> > +
> > +define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct27:
> > +; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
> > +; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i16* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i16* %addr, align 1
> > +  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <8 x i16> %vec, %vec
> > +  ret <8 x i16> %vmull.i
> > +}
> > +
> > +define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct28:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
> > +; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <2 x i32> %vec, %vec
> > +  ret <2 x i32> %vmull.i
> > +}
> > +
> > +define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct29:
> > +; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
> > +; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
> > +entry:
> > +  %addr = getelementptr i32* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i32* %addr, align 1
> > +  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
> > +  %vmull.i = mul <4 x i32> %vec, %vec
> > +  ret <4 x i32> %vmull.i
> > +}
> > +
> > +define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct30:
> > +; CHECK: ldr d0, [x0, x1, lsl #3]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
> > +   ret <1 x i64> %vec
> > +}
> > +
> > +define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
> > +; CHECK-LABEL: fct31:
> > +; CHECK: ldr d0, [x0, x1, lsl #3]
> > +entry:
> > +  %addr = getelementptr i64* %sp0, i64 %offset
> > +  %pix_sp0.0.copyload = load i64* %addr, align 1
> > +  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
> > +  ret <2 x i64> %vec
> > +}
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>



-- 
Alexey Samsonov, MSK
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140331/b177e2d4/attachment.html>


More information about the llvm-commits mailing list