[llvm] r256004 - [AArch64] Promote loads from stores

Charlie Turner via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 21 07:11:33 PST 2015


Hi Jun,
This commit causes miscompares in spec2000's gcc benchmark. Do you run
this in ref mode? In particular,
200.s and scilab.s.

--Charlie.

On 18 December 2015 at 18:08, Jun Bum Lim via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: junbuml
> Date: Fri Dec 18 12:08:30 2015
> New Revision: 256004
>
> URL: http://llvm.org/viewvc/llvm-project?rev=256004&view=rev
> Log:
> [AArch64] Promote loads from stores
>
> This change promotes load instructions which directly read from stores by
> replacing them with mov instructions. If the store is wider than the load,
> the load will be replaced with a bitfield extract.
> For example :
>   STRWui %W1, %X0, 1
>   %W0 = LDRHHui %X0, 3
> becomes
>   STRWui %W1, %X0, 1
>   %W0 = UBFMWri %W1, 16, 31
>
> Added:
>     llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll
> Modified:
>     llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
>     llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=256004&r1=256003&r2=256004&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Fri Dec 18 12:08:30 2015
> @@ -43,6 +43,7 @@ STATISTIC(NumUnscaledPairCreated,
>            "Number of load/store from unscaled generated");
>  STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
>  STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
> +STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
>
>  static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
>                                     cl::init(20), cl::Hidden);
> @@ -93,6 +94,12 @@ struct AArch64LoadStoreOpt : public Mach
>    MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
>                                                 LdStPairFlags &Flags,
>                                                 unsigned Limit);
> +
> +  // Scan the instructions looking for a store that writes to the address from
> +  // which the current load instruction reads. Return true if one is found.
> +  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
> +                         MachineBasicBlock::iterator &StoreI);
> +
>    // Merge the two instructions indicated into a single pair-wise instruction.
>    // If MergeForward is true, erase the first instruction and fold its
>    // operation into the second. If false, the reverse. Return the instruction
> @@ -102,6 +109,11 @@ struct AArch64LoadStoreOpt : public Mach
>                     MachineBasicBlock::iterator Paired,
>                     const LdStPairFlags &Flags);
>
> +  // Promote the load that reads directly from the address stored to.
> +  MachineBasicBlock::iterator
> +  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
> +                       MachineBasicBlock::iterator StoreI);
> +
>    // Scan the instruction list to find a base register update that can
>    // be combined with the current instruction (a load or store) using
>    // pre or post indexed addressing with writeback. Scan forwards.
> @@ -128,6 +140,9 @@ struct AArch64LoadStoreOpt : public Mach
>    // Find and merge foldable ldr/str instructions.
>    bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
>
> +  // Find and promote load instructions which read directly from store.
> +  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
> +
>    // Check if converting two narrow loads into a single wider load with
>    // bitfield extracts could be enabled.
>    bool enableNarrowLdMerge(MachineFunction &Fn);
> @@ -399,6 +414,36 @@ static unsigned getMatchingPairOpcode(un
>    }
>  }
>
> +static unsigned isMatchingStore(MachineInstr *LoadInst,
> +                                MachineInstr *StoreInst) {
> +  unsigned LdOpc = LoadInst->getOpcode();
> +  unsigned StOpc = StoreInst->getOpcode();
> +  switch (LdOpc) {
> +  default:
> +    llvm_unreachable("Unsupported load instruction!");
> +  case AArch64::LDRBBui:
> +    return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
> +           StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
> +  case AArch64::LDURBBi:
> +    return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
> +           StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
> +  case AArch64::LDRHHui:
> +    return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
> +           StOpc == AArch64::STRXui;
> +  case AArch64::LDURHHi:
> +    return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
> +           StOpc == AArch64::STURXi;
> +  case AArch64::LDRWui:
> +    return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
> +  case AArch64::LDURWi:
> +    return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
> +  case AArch64::LDRXui:
> +    return StOpc == AArch64::STRXui;
> +  case AArch64::LDURXi:
> +    return StOpc == AArch64::STURXi;
> +  }
> +}
> +
>  static unsigned getPreIndexedOpcode(unsigned Opc) {
>    switch (Opc) {
>    default:
> @@ -553,6 +598,21 @@ static const MachineOperand &getLdStOffs
>    return MI->getOperand(Idx);
>  }
>
> +static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
> +                                  MachineInstr *StoreInst) {
> +  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
> +  int LoadSize = getMemScale(LoadInst);
> +  int StoreSize = getMemScale(StoreInst);
> +  int UnscaledStOffset = isUnscaledLdSt(StoreInst)
> +                             ? getLdStOffsetOp(StoreInst).getImm()
> +                             : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
> +  int UnscaledLdOffset = isUnscaledLdSt(LoadInst)
> +                             ? getLdStOffsetOp(LoadInst).getImm()
> +                             : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
> +  return (UnscaledStOffset <= UnscaledLdOffset) &&
> +         (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
> +}
> +
>  // Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
>  static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
>                                     MachineInstr *Op1) {
> @@ -800,6 +860,106 @@ AArch64LoadStoreOpt::mergePairedInsns(Ma
>    return NextI;
>  }
>
> +MachineBasicBlock::iterator
> +AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
> +                                          MachineBasicBlock::iterator StoreI) {
> +  MachineBasicBlock::iterator NextI = LoadI;
> +  ++NextI;
> +
> +  int LoadSize = getMemScale(LoadI);
> +  int StoreSize = getMemScale(StoreI);
> +  unsigned LdRt = getLdStRegOp(LoadI).getReg();
> +  unsigned StRt = getLdStRegOp(StoreI).getReg();
> +  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
> +
> +  assert((IsStoreXReg ||
> +          TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
> +         "Unexpected RegClass");
> +
> +  MachineInstr *BitExtMI;
> +  if (LoadSize == StoreSize) {
> +    // Remove the load, if the destination register of the loads is the same
> +    // register for stored value.
> +    if (StRt == LdRt) {
> +      DEBUG(dbgs() << "Remove load instruction:\n    ");
> +      DEBUG(LoadI->print(dbgs()));
> +      DEBUG(dbgs() << "\n");
> +      LoadI->eraseFromParent();
> +      return NextI;
> +    }
> +    // Replace the load with a mov if the load and store are in the same size.
> +    BitExtMI =
> +        BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
> +                TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
> +            .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
> +            .addReg(StRt)
> +            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
> +  } else {
> +    // FIXME: Currently we disable this transformation in big-endian targets as
> +    // performance and correctness are verified only in little-endian.
> +    if (!Subtarget->isLittleEndian())
> +      return NextI;
> +    bool IsUnscaled = isUnscaledLdSt(LoadI);
> +    assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match");
> +    assert(LoadSize < StoreSize && "Invalid load size");
> +    int UnscaledLdOffset = IsUnscaled
> +                               ? getLdStOffsetOp(LoadI).getImm()
> +                               : getLdStOffsetOp(LoadI).getImm() * LoadSize;
> +    int UnscaledStOffset = IsUnscaled
> +                               ? getLdStOffsetOp(StoreI).getImm()
> +                               : getLdStOffsetOp(StoreI).getImm() * StoreSize;
> +    int Width = LoadSize * 8;
> +    int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
> +    int Imms = Immr + Width - 1;
> +    unsigned DestReg = IsStoreXReg
> +                           ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
> +                                                      &AArch64::GPR64RegClass)
> +                           : LdRt;
> +
> +    assert(((UnscaledLdOffset) >= UnscaledStOffset &&
> +            (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
> +           "Invalid offset");
> +
> +    Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
> +    Imms = Immr + Width - 1;
> +    if (UnscaledLdOffset == UnscaledStOffset) {
> +      uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
> +                                | ((Immr) << 6)               // immr
> +                                | ((Imms) << 0)               // imms
> +          ;
> +
> +      BitExtMI =
> +          BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
> +                  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
> +                  DestReg)
> +              .addReg(StRt)
> +              .addImm(AndMaskEncoded);
> +    } else {
> +      BitExtMI =
> +          BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
> +                  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
> +                  DestReg)
> +              .addReg(StRt)
> +              .addImm(Immr)
> +              .addImm(Imms);
> +    }
> +  }
> +
> +  DEBUG(dbgs() << "Promoting load by replacing :\n    ");
> +  DEBUG(StoreI->print(dbgs()));
> +  DEBUG(dbgs() << "    ");
> +  DEBUG(LoadI->print(dbgs()));
> +  DEBUG(dbgs() << "  with instructions:\n    ");
> +  DEBUG(StoreI->print(dbgs()));
> +  DEBUG(dbgs() << "    ");
> +  DEBUG((BitExtMI)->print(dbgs()));
> +  DEBUG(dbgs() << "\n");
> +
> +  // Erase the old instructions.
> +  LoadI->eraseFromParent();
> +  return NextI;
> +}
> +
>  /// trackRegDefsUses - Remember what registers the specified instruction uses
>  /// and modifies.
>  static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
> @@ -863,6 +1023,60 @@ static bool mayAlias(MachineInstr *MIa,
>    return false;
>  }
>
> +bool AArch64LoadStoreOpt::findMatchingStore(
> +    MachineBasicBlock::iterator I, unsigned Limit,
> +    MachineBasicBlock::iterator &StoreI) {
> +  MachineBasicBlock::iterator E = I->getParent()->begin();
> +  MachineBasicBlock::iterator MBBI = I;
> +  MachineInstr *FirstMI = I;
> +  unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
> +
> +  // Track which registers have been modified and used between the first insn
> +  // and the second insn.
> +  BitVector ModifiedRegs, UsedRegs;
> +  ModifiedRegs.resize(TRI->getNumRegs());
> +  UsedRegs.resize(TRI->getNumRegs());
> +
> +  for (unsigned Count = 0; MBBI != E && Count < Limit;) {
> +    --MBBI;
> +    MachineInstr *MI = MBBI;
> +    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
> +    // optimization by changing how far we scan.
> +    if (MI->isDebugValue())
> +      continue;
> +    // Now that we know this is a real instruction, count it.
> +    ++Count;
> +
> +    // If the load instruction reads directly from the address to which the
> +    // store instruction writes and the stored value is not modified, we can
> +    // promote the load. Since we do not handle stores with pre-/post-index,
> +    // it's unnecessary to check if BaseReg is modified by the store itself.
> +    if (MI->mayStore() && isMatchingStore(FirstMI, MI) &&
> +        BaseReg == getLdStBaseOp(MI).getReg() &&
> +        isLdOffsetInRangeOfSt(FirstMI, MI) &&
> +        !ModifiedRegs[getLdStRegOp(MI).getReg()]) {
> +      StoreI = MBBI;
> +      return true;
> +    }
> +
> +    if (MI->isCall())
> +      return false;
> +
> +    // Update modified / uses register lists.
> +    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
> +
> +    // Otherwise, if the base register is modified, we have no match, so
> +    // return early.
> +    if (ModifiedRegs[BaseReg])
> +      return false;
> +
> +    // If we encounter a store aliased with the load, return early.
> +    if (MI->mayStore() && mayAlias(FirstMI, MI, TII))
> +      return false;
> +  }
> +  return false;
> +}
> +
>  /// findMatchingInsn - Scan the instructions looking for a load/store that can
>  /// be combined with the current instruction into a load/store pair.
>  MachineBasicBlock::iterator
> @@ -1263,6 +1477,31 @@ MachineBasicBlock::iterator AArch64LoadS
>    return E;
>  }
>
> +bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
> +    MachineBasicBlock::iterator &MBBI) {
> +  MachineInstr *MI = MBBI;
> +  // If this is a volatile load, don't mess with it.
> +  if (MI->hasOrderedMemoryRef())
> +    return false;
> +
> +  // Make sure this is a reg+imm.
> +  // FIXME: It is possible to extend it to handle reg+reg cases.
> +  if (!getLdStOffsetOp(MI).isImm())
> +    return false;
> +
> +  // Look backward up to ScanLimit instructions.
> +  MachineBasicBlock::iterator StoreI;
> +  if (findMatchingStore(MBBI, ScanLimit, StoreI)) {
> +    ++NumLoadsFromStoresPromoted;
> +    // Promote the load. Keeping the iterator straight is a
> +    // pain, so we let the merge routine tell us what the next instruction
> +    // is after it's done mucking about.
> +    MBBI = promoteLoadFromStore(MBBI, StoreI);
> +    return true;
> +  }
> +  return false;
> +}
> +
>  bool AArch64LoadStoreOpt::tryToMergeLdStInst(
>      MachineBasicBlock::iterator &MBBI) {
>    MachineInstr *MI = MBBI;
> @@ -1307,7 +1546,16 @@ bool AArch64LoadStoreOpt::optimizeBlock(
>                                          bool enableNarrowLdOpt) {
>    bool Modified = false;
>    // Three tranformations to do here:
> -  // 1) Find narrow loads that can be converted into a single wider load
> +  // 1) Find loads that directly read from stores and promote them by
> +  //    replacing with mov instructions. If the store is wider than the load,
> +  //    the load will be replaced with a bitfield extract.
> +  //      e.g.,
> +  //        str w1, [x0, #4]
> +  //        ldrh w2, [x0, #6]
> +  //        ; becomes
> +  //        str w1, [x0, #4]
> +  //        lsr        w2, w1, #16
> +  // 2) Find narrow loads that can be converted into a single wider load
>    //    with bitfield extract instructions.
>    //      e.g.,
>    //        ldrh w0, [x2]
> @@ -1316,14 +1564,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(
>    //        ldr w0, [x2]
>    //        ubfx w1, w0, #16, #16
>    //        and w0, w0, #ffff
> -  // 2) Find loads and stores that can be merged into a single load or store
> +  // 3) Find loads and stores that can be merged into a single load or store
>    //    pair instruction.
>    //      e.g.,
>    //        ldr x0, [x2]
>    //        ldr x1, [x2, #8]
>    //        ; becomes
>    //        ldp x0, x1, [x2]
> -  // 3) Find base register updates that can be merged into the load or store
> +  // 4) Find base register updates that can be merged into the load or store
>    //    as a base-reg writeback.
>    //      e.g.,
>    //        ldr x0, [x2]
> @@ -1332,6 +1580,35 @@ bool AArch64LoadStoreOpt::optimizeBlock(
>    //        ldr x0, [x2], #4
>
>    for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
> +       MBBI != E;) {
> +    MachineInstr *MI = MBBI;
> +    switch (MI->getOpcode()) {
> +    default:
> +      // Just move on to the next instruction.
> +      ++MBBI;
> +      break;
> +    // Scaled instructions.
> +    case AArch64::LDRBBui:
> +    case AArch64::LDRHHui:
> +    case AArch64::LDRWui:
> +    case AArch64::LDRXui:
> +    // Unscaled instructions.
> +    case AArch64::LDURBBi:
> +    case AArch64::LDURHHi:
> +    case AArch64::LDURWi:
> +    case AArch64::LDURXi: {
> +      if (tryToPromoteLoadFromStore(MBBI)) {
> +        Modified = true;
> +        break;
> +      }
> +      ++MBBI;
> +      break;
> +    }
> +      // FIXME: Do the other instructions.
> +    }
> +  }
> +
> +  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
>         enableNarrowLdOpt && MBBI != E;) {
>      MachineInstr *MI = MBBI;
>      switch (MI->getOpcode()) {
>
> Modified: llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll?rev=256004&r1=256003&r2=256004&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll Fri Dec 18 12:08:30 2015
> @@ -1,9 +1,9 @@
>  ; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
>
>  ; CHECK: foo
> -; CHECK: ldr w[[REG:[0-9]+]], [x19, #264]
> -; CHECK: str w[[REG]], [x19, #132]
> -; CHECK: ldr w{{[0-9]+}}, [x19, #264]
> +; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
> +; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]]
> +; CHECK: str w[[REG1]], [x19, #132]
>
>  define i32 @foo(i32 %a) nounwind {
>    %retval = alloca i32, align 4
>
> Added: llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll?rev=256004&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll Fri Dec 18 12:08:30 2015
> @@ -0,0 +1,666 @@
> +; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs | FileCheck %s
> +
> +; CHECK-LABEL: Str64Ldr64
> +; CHECK: mov x0, x1
> +define i64 @Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i64*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 1
> +  %1 = load i64, i64* %arrayidx1
> +  ret i64 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr32_0
> +; CHECK: and x0, x1, #0xffffffff
> +define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i32*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 2
> +  %1 = load i32, i32* %arrayidx1
> +  ret i32 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr32_1
> +; CHECK: lsr x0, x1, #32
> +define i32 @Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i32*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 3
> +  %1 = load i32, i32* %arrayidx1
> +  ret i32 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_0
> +; CHECK: and x0, x1, #0xffff
> +define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 4
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_1
> +; CHECK: ubfx x0, x1, #16, #16
> +define i16 @Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 5
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_2
> +; CHECK: ubfx x0, x1, #32, #16
> +define i16 @Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 6
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_3
> +; CHECK: lsr x0, x1, #48
> +define i16 @Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 7
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_0
> +; CHECK: and x0, x1, #0xff
> +define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 8
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_1
> +; CHECK: ubfx x0, x1, #8, #8
> +define i8 @Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 9
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_2
> +; CHECK: ubfx x0, x1, #16, #8
> +define i8 @Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 10
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_3
> +; CHECK: ubfx x0, x1, #24, #8
> +define i8 @Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 11
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_4
> +; CHECK: ubfx x0, x1, #32, #8
> +define i8 @Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 12
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_5
> +; CHECK: ubfx x0, x1, #40, #8
> +define i8 @Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 13
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_6
> +; CHECK: ubfx x0, x1, #48, #8
> +define i8 @Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 14
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_7
> +; CHECK: lsr x0, x1, #56
> +define i8 @Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 15
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr32
> +; CHECK: mov w0, w1
> +define i32 @Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i32*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1
> +  %1 = load i32, i32* %arrayidx1
> +  ret i32 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr16_0
> +; CHECK: and w0, w1, #0xffff
> +define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr16_1
> +; CHECK: lsr   w0, w1, #16
> +define i16 @Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 4
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 5
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_2
> +; CHECK: ubfx w0, w1, #16, #8
> +define i8 @Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 6
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_3
> +; CHECK: lsr w0, w1, #24
> +define i8 @Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 7
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str16Ldr16
> +; CHECK: mov w0, w1
> +define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i16* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
> +  store i16 %v, i16* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str16Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i16* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
> +  store i16 %v, i16* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 2
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str16Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i16* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
> +  store i16 %v, i16* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 3
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr64
> +; CHECK: mov x0, x1
> +define i64 @Unscaled_Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i64*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 -1
> +  %1 = load i64, i64* %arrayidx1
> +  ret i64 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr32_0
> +; CHECK: and x0, x1, #0xffffffff
> +define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i32*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -2
> +  %1 = load i32, i32* %arrayidx1
> +  ret i32 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr32_1
> +; CHECK: lsr x0, x1, #32
> +define i32 @Unscaled_Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i32*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
> +  %1 = load i32, i32* %arrayidx1
> +  ret i32 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_0
> +; CHECK: and x0, x1, #0xffff
> +define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -4
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_1
> +; CHECK: ubfx x0, x1, #16, #16
> +define i16 @Unscaled_Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_2
> +; CHECK: ubfx x0, x1, #32, #16
> +define i16 @Unscaled_Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_3
> +; CHECK: lsr x0, x1, #48
> +define i16 @Unscaled_Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_0
> +; CHECK: and x0, x1, #0xff
> +define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -8
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_1
> +; CHECK: ubfx x0, x1, #8, #8
> +define i8 @Unscaled_Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -7
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_2
> +; CHECK: ubfx x0, x1, #16, #8
> +define i8 @Unscaled_Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -6
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_3
> +; CHECK: ubfx x0, x1, #24, #8
> +define i8 @Unscaled_Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -5
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_4
> +; CHECK: ubfx x0, x1, #32, #8
> +define i8 @Unscaled_Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_5
> +; CHECK: ubfx x0, x1, #40, #8
> +define i8 @Unscaled_Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_6
> +; CHECK: ubfx x0, x1, #48, #8
> +define i8 @Unscaled_Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_7
> +; CHECK: lsr x0, x1, #56
> +define i8 @Unscaled_Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i64* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> +  store i64 %v, i64* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr32
> +; CHECK: mov w0, w1
> +define i32 @Unscaled_Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i32*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
> +  %1 = load i32, i32* %arrayidx1
> +  ret i32 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr16_0
> +; CHECK: and w0, w1, #0xffff
> +define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr16_1
> +; CHECK: lsr   w0, w1, #16
> +define i16 @Unscaled_Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Unscaled_Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_2
> +; CHECK: ubfx w0, w1, #16, #8
> +define i8 @Unscaled_Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_3
> +; CHECK: lsr w0, w1, #24
> +define i8 @Unscaled_Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str16Ldr16
> +; CHECK: mov w0, w1
> +define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i16* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
> +  store i16 %v, i16* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str16Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i16* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
> +  store i16 %v, i16* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str16Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Unscaled_Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i16* %P to i8*
> +  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
> +  store i16 %v, i16* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
> +  %1 = load i8, i8* %arrayidx1
> +  ret i8 %1
> +}
> +
> +; CHECK-LABEL: StrVolatileLdr
> +; CHECK: ldrh
> +define i16 @StrVolatileLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
> +  %1 = load volatile i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: StrNotInRangeLdr
> +; CHECK: ldrh
> +define i16 @StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_StrNotInRangeLdr
> +; CHECK: ldurh
> +define i16 @Unscaled_StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> +  store i32 %v, i32* %arrayidx0
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +; CHECK-LABEL: StrCallLdr
> +; CHECK: ldrh
> +define i16 @StrCallLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  %c = call i1 @test_dummy()
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
> +
> +declare i1 @test_dummy()
> +
> +; CHECK-LABEL: StrStrLdr
> +; CHECK: ldrh
> +define i16 @StrStrLdr(i32 %v, i32* %P, i32* %P2, i32 %n) {
> +entry:
> +  %0 = bitcast i32* %P to i16*
> +  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> +  store i32 %v, i32* %arrayidx0
> +  store i32 %n, i32* %P2
> +  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
> +  %1 = load i16, i16* %arrayidx1
> +  ret i16 %1
> +}
>
> Modified: llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll?rev=256004&r1=256003&r2=256004&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll Fri Dec 18 12:08:30 2015
> @@ -27,8 +27,8 @@ define i64 @test_chains() {
>
>  ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
>  ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
> -; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
> -; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
> +; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]]
> +; CHECK: mov {{w[0-9]+}}, w[[STRVAL]]
>
>    %ret.1 = load i8, i8* %locvar
>    %ret.2 = zext i8 %ret.1 to i64
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list