[llvm] r256004 - [AArch64] Promote loads from stores
Charlie Turner via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 21 07:11:33 PST 2015
Hi Jun,
This commit causes miscompares in spec2000's gcc benchmark. Do you run
this in ref mode? In particular,
200.s and scilab.s.
--Charlie.
On 18 December 2015 at 18:08, Jun Bum Lim via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: junbuml
> Date: Fri Dec 18 12:08:30 2015
> New Revision: 256004
>
> URL: http://llvm.org/viewvc/llvm-project?rev=256004&view=rev
> Log:
> [AArch64] Promote loads from stores
>
> This change promotes load instructions which directly read from stores by
> replacing them with mov instructions. If the store is wider than the load,
> the load will be replaced with a bitfield extract.
> For example :
> STRWui %W1, %X0, 1
> %W0 = LDRHHui %X0, 3
> becomes
> STRWui %W1, %X0, 1
> %W0 = UBFMWri %W1, 16, 31
>
> Added:
> llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll
> Modified:
> llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
> llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
> llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=256004&r1=256003&r2=256004&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Fri Dec 18 12:08:30 2015
> @@ -43,6 +43,7 @@ STATISTIC(NumUnscaledPairCreated,
> "Number of load/store from unscaled generated");
> STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
> STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
> +STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
>
> static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
> cl::init(20), cl::Hidden);
> @@ -93,6 +94,12 @@ struct AArch64LoadStoreOpt : public Mach
> MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
> LdStPairFlags &Flags,
> unsigned Limit);
> +
> + // Scan the instructions looking for a store that writes to the address from
> + // which the current load instruction reads. Return true if one is found.
> + bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
> + MachineBasicBlock::iterator &StoreI);
> +
> // Merge the two instructions indicated into a single pair-wise instruction.
> // If MergeForward is true, erase the first instruction and fold its
> // operation into the second. If false, the reverse. Return the instruction
> @@ -102,6 +109,11 @@ struct AArch64LoadStoreOpt : public Mach
> MachineBasicBlock::iterator Paired,
> const LdStPairFlags &Flags);
>
> + // Promote the load that reads directly from the address stored to.
> + MachineBasicBlock::iterator
> + promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
> + MachineBasicBlock::iterator StoreI);
> +
> // Scan the instruction list to find a base register update that can
> // be combined with the current instruction (a load or store) using
> // pre or post indexed addressing with writeback. Scan forwards.
> @@ -128,6 +140,9 @@ struct AArch64LoadStoreOpt : public Mach
> // Find and merge foldable ldr/str instructions.
> bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
>
> + // Find and promote load instructions which read directly from store.
> + bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
> +
> // Check if converting two narrow loads into a single wider load with
> // bitfield extracts could be enabled.
> bool enableNarrowLdMerge(MachineFunction &Fn);
> @@ -399,6 +414,36 @@ static unsigned getMatchingPairOpcode(un
> }
> }
>
> +static unsigned isMatchingStore(MachineInstr *LoadInst,
> + MachineInstr *StoreInst) {
> + unsigned LdOpc = LoadInst->getOpcode();
> + unsigned StOpc = StoreInst->getOpcode();
> + switch (LdOpc) {
> + default:
> + llvm_unreachable("Unsupported load instruction!");
> + case AArch64::LDRBBui:
> + return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
> + StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
> + case AArch64::LDURBBi:
> + return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
> + StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
> + case AArch64::LDRHHui:
> + return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
> + StOpc == AArch64::STRXui;
> + case AArch64::LDURHHi:
> + return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
> + StOpc == AArch64::STURXi;
> + case AArch64::LDRWui:
> + return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
> + case AArch64::LDURWi:
> + return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
> + case AArch64::LDRXui:
> + return StOpc == AArch64::STRXui;
> + case AArch64::LDURXi:
> + return StOpc == AArch64::STURXi;
> + }
> +}
> +
> static unsigned getPreIndexedOpcode(unsigned Opc) {
> switch (Opc) {
> default:
> @@ -553,6 +598,21 @@ static const MachineOperand &getLdStOffs
> return MI->getOperand(Idx);
> }
>
> +static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
> + MachineInstr *StoreInst) {
> + assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
> + int LoadSize = getMemScale(LoadInst);
> + int StoreSize = getMemScale(StoreInst);
> + int UnscaledStOffset = isUnscaledLdSt(StoreInst)
> + ? getLdStOffsetOp(StoreInst).getImm()
> + : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
> + int UnscaledLdOffset = isUnscaledLdSt(LoadInst)
> + ? getLdStOffsetOp(LoadInst).getImm()
> + : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
> + return (UnscaledStOffset <= UnscaledLdOffset) &&
> + (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
> +}
> +
> // Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
> static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
> MachineInstr *Op1) {
> @@ -800,6 +860,106 @@ AArch64LoadStoreOpt::mergePairedInsns(Ma
> return NextI;
> }
>
> +MachineBasicBlock::iterator
> +AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
> + MachineBasicBlock::iterator StoreI) {
> + MachineBasicBlock::iterator NextI = LoadI;
> + ++NextI;
> +
> + int LoadSize = getMemScale(LoadI);
> + int StoreSize = getMemScale(StoreI);
> + unsigned LdRt = getLdStRegOp(LoadI).getReg();
> + unsigned StRt = getLdStRegOp(StoreI).getReg();
> + bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
> +
> + assert((IsStoreXReg ||
> + TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
> + "Unexpected RegClass");
> +
> + MachineInstr *BitExtMI;
> + if (LoadSize == StoreSize) {
> + // Remove the load, if the destination register of the loads is the same
> + // register for stored value.
> + if (StRt == LdRt) {
> + DEBUG(dbgs() << "Remove load instruction:\n ");
> + DEBUG(LoadI->print(dbgs()));
> + DEBUG(dbgs() << "\n");
> + LoadI->eraseFromParent();
> + return NextI;
> + }
> + // Replace the load with a mov if the load and store are in the same size.
> + BitExtMI =
> + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
> + TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
> + .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
> + .addReg(StRt)
> + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
> + } else {
> + // FIXME: Currently we disable this transformation in big-endian targets as
> + // performance and correctness are verified only in little-endian.
> + if (!Subtarget->isLittleEndian())
> + return NextI;
> + bool IsUnscaled = isUnscaledLdSt(LoadI);
> + assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match");
> + assert(LoadSize < StoreSize && "Invalid load size");
> + int UnscaledLdOffset = IsUnscaled
> + ? getLdStOffsetOp(LoadI).getImm()
> + : getLdStOffsetOp(LoadI).getImm() * LoadSize;
> + int UnscaledStOffset = IsUnscaled
> + ? getLdStOffsetOp(StoreI).getImm()
> + : getLdStOffsetOp(StoreI).getImm() * StoreSize;
> + int Width = LoadSize * 8;
> + int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
> + int Imms = Immr + Width - 1;
> + unsigned DestReg = IsStoreXReg
> + ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
> + &AArch64::GPR64RegClass)
> + : LdRt;
> +
> + assert(((UnscaledLdOffset) >= UnscaledStOffset &&
> + (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
> + "Invalid offset");
> +
> + Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
> + Imms = Immr + Width - 1;
> + if (UnscaledLdOffset == UnscaledStOffset) {
> + uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
> + | ((Immr) << 6) // immr
> + | ((Imms) << 0) // imms
> + ;
> +
> + BitExtMI =
> + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
> + TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
> + DestReg)
> + .addReg(StRt)
> + .addImm(AndMaskEncoded);
> + } else {
> + BitExtMI =
> + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
> + TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
> + DestReg)
> + .addReg(StRt)
> + .addImm(Immr)
> + .addImm(Imms);
> + }
> + }
> +
> + DEBUG(dbgs() << "Promoting load by replacing :\n ");
> + DEBUG(StoreI->print(dbgs()));
> + DEBUG(dbgs() << " ");
> + DEBUG(LoadI->print(dbgs()));
> + DEBUG(dbgs() << " with instructions:\n ");
> + DEBUG(StoreI->print(dbgs()));
> + DEBUG(dbgs() << " ");
> + DEBUG((BitExtMI)->print(dbgs()));
> + DEBUG(dbgs() << "\n");
> +
> + // Erase the old instructions.
> + LoadI->eraseFromParent();
> + return NextI;
> +}
> +
> /// trackRegDefsUses - Remember what registers the specified instruction uses
> /// and modifies.
> static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
> @@ -863,6 +1023,60 @@ static bool mayAlias(MachineInstr *MIa,
> return false;
> }
>
> +bool AArch64LoadStoreOpt::findMatchingStore(
> + MachineBasicBlock::iterator I, unsigned Limit,
> + MachineBasicBlock::iterator &StoreI) {
> + MachineBasicBlock::iterator E = I->getParent()->begin();
> + MachineBasicBlock::iterator MBBI = I;
> + MachineInstr *FirstMI = I;
> + unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
> +
> + // Track which registers have been modified and used between the first insn
> + // and the second insn.
> + BitVector ModifiedRegs, UsedRegs;
> + ModifiedRegs.resize(TRI->getNumRegs());
> + UsedRegs.resize(TRI->getNumRegs());
> +
> + for (unsigned Count = 0; MBBI != E && Count < Limit;) {
> + --MBBI;
> + MachineInstr *MI = MBBI;
> + // Skip DBG_VALUE instructions. Otherwise debug info can affect the
> + // optimization by changing how far we scan.
> + if (MI->isDebugValue())
> + continue;
> + // Now that we know this is a real instruction, count it.
> + ++Count;
> +
> + // If the load instruction reads directly from the address to which the
> + // store instruction writes and the stored value is not modified, we can
> + // promote the load. Since we do not handle stores with pre-/post-index,
> + // it's unnecessary to check if BaseReg is modified by the store itself.
> + if (MI->mayStore() && isMatchingStore(FirstMI, MI) &&
> + BaseReg == getLdStBaseOp(MI).getReg() &&
> + isLdOffsetInRangeOfSt(FirstMI, MI) &&
> + !ModifiedRegs[getLdStRegOp(MI).getReg()]) {
> + StoreI = MBBI;
> + return true;
> + }
> +
> + if (MI->isCall())
> + return false;
> +
> + // Update modified / uses register lists.
> + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
> +
> + // Otherwise, if the base register is modified, we have no match, so
> + // return early.
> + if (ModifiedRegs[BaseReg])
> + return false;
> +
> + // If we encounter a store aliased with the load, return early.
> + if (MI->mayStore() && mayAlias(FirstMI, MI, TII))
> + return false;
> + }
> + return false;
> +}
> +
> /// findMatchingInsn - Scan the instructions looking for a load/store that can
> /// be combined with the current instruction into a load/store pair.
> MachineBasicBlock::iterator
> @@ -1263,6 +1477,31 @@ MachineBasicBlock::iterator AArch64LoadS
> return E;
> }
>
> +bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
> + MachineBasicBlock::iterator &MBBI) {
> + MachineInstr *MI = MBBI;
> + // If this is a volatile load, don't mess with it.
> + if (MI->hasOrderedMemoryRef())
> + return false;
> +
> + // Make sure this is a reg+imm.
> + // FIXME: It is possible to extend it to handle reg+reg cases.
> + if (!getLdStOffsetOp(MI).isImm())
> + return false;
> +
> + // Look backward up to ScanLimit instructions.
> + MachineBasicBlock::iterator StoreI;
> + if (findMatchingStore(MBBI, ScanLimit, StoreI)) {
> + ++NumLoadsFromStoresPromoted;
> + // Promote the load. Keeping the iterator straight is a
> + // pain, so we let the merge routine tell us what the next instruction
> + // is after it's done mucking about.
> + MBBI = promoteLoadFromStore(MBBI, StoreI);
> + return true;
> + }
> + return false;
> +}
> +
> bool AArch64LoadStoreOpt::tryToMergeLdStInst(
> MachineBasicBlock::iterator &MBBI) {
> MachineInstr *MI = MBBI;
> @@ -1307,7 +1546,16 @@ bool AArch64LoadStoreOpt::optimizeBlock(
> bool enableNarrowLdOpt) {
> bool Modified = false;
> // Three tranformations to do here:
> - // 1) Find narrow loads that can be converted into a single wider load
> + // 1) Find loads that directly read from stores and promote them by
> + // replacing with mov instructions. If the store is wider than the load,
> + // the load will be replaced with a bitfield extract.
> + // e.g.,
> + // str w1, [x0, #4]
> + // ldrh w2, [x0, #6]
> + // ; becomes
> + // str w1, [x0, #4]
> + // lsr w2, w1, #16
> + // 2) Find narrow loads that can be converted into a single wider load
> // with bitfield extract instructions.
> // e.g.,
> // ldrh w0, [x2]
> @@ -1316,14 +1564,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(
> // ldr w0, [x2]
> // ubfx w1, w0, #16, #16
> // and w0, w0, #ffff
> - // 2) Find loads and stores that can be merged into a single load or store
> + // 3) Find loads and stores that can be merged into a single load or store
> // pair instruction.
> // e.g.,
> // ldr x0, [x2]
> // ldr x1, [x2, #8]
> // ; becomes
> // ldp x0, x1, [x2]
> - // 3) Find base register updates that can be merged into the load or store
> + // 4) Find base register updates that can be merged into the load or store
> // as a base-reg writeback.
> // e.g.,
> // ldr x0, [x2]
> @@ -1332,6 +1580,35 @@ bool AArch64LoadStoreOpt::optimizeBlock(
> // ldr x0, [x2], #4
>
> for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
> + MBBI != E;) {
> + MachineInstr *MI = MBBI;
> + switch (MI->getOpcode()) {
> + default:
> + // Just move on to the next instruction.
> + ++MBBI;
> + break;
> + // Scaled instructions.
> + case AArch64::LDRBBui:
> + case AArch64::LDRHHui:
> + case AArch64::LDRWui:
> + case AArch64::LDRXui:
> + // Unscaled instructions.
> + case AArch64::LDURBBi:
> + case AArch64::LDURHHi:
> + case AArch64::LDURWi:
> + case AArch64::LDURXi: {
> + if (tryToPromoteLoadFromStore(MBBI)) {
> + Modified = true;
> + break;
> + }
> + ++MBBI;
> + break;
> + }
> + // FIXME: Do the other instructions.
> + }
> + }
> +
> + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
> enableNarrowLdOpt && MBBI != E;) {
> MachineInstr *MI = MBBI;
> switch (MI->getOpcode()) {
>
> Modified: llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll?rev=256004&r1=256003&r2=256004&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll Fri Dec 18 12:08:30 2015
> @@ -1,9 +1,9 @@
> ; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
>
> ; CHECK: foo
> -; CHECK: ldr w[[REG:[0-9]+]], [x19, #264]
> -; CHECK: str w[[REG]], [x19, #132]
> -; CHECK: ldr w{{[0-9]+}}, [x19, #264]
> +; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
> +; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]]
> +; CHECK: str w[[REG1]], [x19, #132]
>
> define i32 @foo(i32 %a) nounwind {
> %retval = alloca i32, align 4
>
> Added: llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll?rev=256004&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll Fri Dec 18 12:08:30 2015
> @@ -0,0 +1,666 @@
> +; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs | FileCheck %s
> +
> +; CHECK-LABEL: Str64Ldr64
> +; CHECK: mov x0, x1
> +define i64 @Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i64*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 1
> + %1 = load i64, i64* %arrayidx1
> + ret i64 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr32_0
> +; CHECK: and x0, x1, #0xffffffff
> +define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i32*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 2
> + %1 = load i32, i32* %arrayidx1
> + ret i32 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr32_1
> +; CHECK: lsr x0, x1, #32
> +define i32 @Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i32*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 3
> + %1 = load i32, i32* %arrayidx1
> + ret i32 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_0
> +; CHECK: and x0, x1, #0xffff
> +define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 4
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_1
> +; CHECK: ubfx x0, x1, #16, #16
> +define i16 @Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 5
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_2
> +; CHECK: ubfx x0, x1, #32, #16
> +define i16 @Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 6
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr16_3
> +; CHECK: lsr x0, x1, #48
> +define i16 @Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 7
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_0
> +; CHECK: and x0, x1, #0xff
> +define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 8
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_1
> +; CHECK: ubfx x0, x1, #8, #8
> +define i8 @Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 9
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_2
> +; CHECK: ubfx x0, x1, #16, #8
> +define i8 @Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 10
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_3
> +; CHECK: ubfx x0, x1, #24, #8
> +define i8 @Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 11
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_4
> +; CHECK: ubfx x0, x1, #32, #8
> +define i8 @Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 12
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_5
> +; CHECK: ubfx x0, x1, #40, #8
> +define i8 @Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 13
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_6
> +; CHECK: ubfx x0, x1, #48, #8
> +define i8 @Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 14
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str64Ldr8_7
> +; CHECK: lsr x0, x1, #56
> +define i8 @Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 15
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr32
> +; CHECK: mov w0, w1
> +define i32 @Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i32*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1
> + %1 = load i32, i32* %arrayidx1
> + ret i32 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr16_0
> +; CHECK: and w0, w1, #0xffff
> +define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr16_1
> +; CHECK: lsr w0, w1, #16
> +define i16 @Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 4
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 5
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_2
> +; CHECK: ubfx w0, w1, #16, #8
> +define i8 @Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 6
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str32Ldr8_3
> +; CHECK: lsr w0, w1, #24
> +define i8 @Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 7
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str16Ldr16
> +; CHECK: mov w0, w1
> +define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> + %0 = bitcast i16* %P to i16*
> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
> + store i16 %v, i16* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Str16Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> + %0 = bitcast i16* %P to i8*
> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
> + store i16 %v, i16* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 2
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Str16Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> + %0 = bitcast i16* %P to i8*
> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
> + store i16 %v, i16* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 3
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr64
> +; CHECK: mov x0, x1
> +define i64 @Unscaled_Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i64*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 -1
> + %1 = load i64, i64* %arrayidx1
> + ret i64 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr32_0
> +; CHECK: and x0, x1, #0xffffffff
> +define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i32*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -2
> + %1 = load i32, i32* %arrayidx1
> + ret i32 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr32_1
> +; CHECK: lsr x0, x1, #32
> +define i32 @Unscaled_Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i32*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
> + %1 = load i32, i32* %arrayidx1
> + ret i32 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_0
> +; CHECK: and x0, x1, #0xffff
> +define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -4
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_1
> +; CHECK: ubfx x0, x1, #16, #16
> +define i16 @Unscaled_Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_2
> +; CHECK: ubfx x0, x1, #32, #16
> +define i16 @Unscaled_Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr16_3
> +; CHECK: lsr x0, x1, #48
> +define i16 @Unscaled_Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i16*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_0
> +; CHECK: and x0, x1, #0xff
> +define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -8
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_1
> +; CHECK: ubfx x0, x1, #8, #8
> +define i8 @Unscaled_Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -7
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_2
> +; CHECK: ubfx x0, x1, #16, #8
> +define i8 @Unscaled_Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -6
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_3
> +; CHECK: ubfx x0, x1, #24, #8
> +define i8 @Unscaled_Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -5
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_4
> +; CHECK: ubfx x0, x1, #32, #8
> +define i8 @Unscaled_Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_5
> +; CHECK: ubfx x0, x1, #40, #8
> +define i8 @Unscaled_Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_6
> +; CHECK: ubfx x0, x1, #48, #8
> +define i8 @Unscaled_Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str64Ldr8_7
> +; CHECK: lsr x0, x1, #56
> +define i8 @Unscaled_Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
> +entry:
> + %0 = bitcast i64* %P to i8*
> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
> + store i64 %v, i64* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr32
> +; CHECK: mov w0, w1
> +define i32 @Unscaled_Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i32*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
> + %1 = load i32, i32* %arrayidx1
> + ret i32 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr16_0
> +; CHECK: and w0, w1, #0xffff
> +define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr16_1
> +; CHECK: lsr w0, w1, #16
> +define i16 @Unscaled_Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Unscaled_Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_2
> +; CHECK: ubfx w0, w1, #16, #8
> +define i8 @Unscaled_Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str32Ldr8_3
> +; CHECK: lsr w0, w1, #24
> +define i8 @Unscaled_Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i8*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str16Ldr16
> +; CHECK: mov w0, w1
> +define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> + %0 = bitcast i16* %P to i16*
> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
> + store i16 %v, i16* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str16Ldr8_0
> +; CHECK: and w0, w1, #0xff
> +define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> + %0 = bitcast i16* %P to i8*
> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
> + store i16 %v, i16* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_Str16Ldr8_1
> +; CHECK: ubfx w0, w1, #8, #8
> +define i8 @Unscaled_Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
> +entry:
> + %0 = bitcast i16* %P to i8*
> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
> + store i16 %v, i16* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
> + %1 = load i8, i8* %arrayidx1
> + ret i8 %1
> +}
> +
> +; CHECK-LABEL: StrVolatileLdr
> +; CHECK: ldrh
> +define i16 @StrVolatileLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
> + %1 = load volatile i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: StrNotInRangeLdr
> +; CHECK: ldrh
> +define i16 @StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: Unscaled_StrNotInRangeLdr
> +; CHECK: ldurh
> +define i16 @Unscaled_StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
> + store i32 %v, i32* %arrayidx0
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +; CHECK-LABEL: StrCallLdr
> +; CHECK: ldrh
> +define i16 @StrCallLdr(i32* nocapture %P, i32 %v, i64 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + %c = call i1 @test_dummy()
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
> +
> +declare i1 @test_dummy()
> +
> +; CHECK-LABEL: StrStrLdr
> +; CHECK: ldrh
> +define i16 @StrStrLdr(i32 %v, i32* %P, i32* %P2, i32 %n) {
> +entry:
> + %0 = bitcast i32* %P to i16*
> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
> + store i32 %v, i32* %arrayidx0
> + store i32 %n, i32* %P2
> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
> + %1 = load i16, i16* %arrayidx1
> + ret i16 %1
> +}
>
> Modified: llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll?rev=256004&r1=256003&r2=256004&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll Fri Dec 18 12:08:30 2015
> @@ -27,8 +27,8 @@ define i64 @test_chains() {
>
> ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
> ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
> -; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
> -; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
> +; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]]
> +; CHECK: mov {{w[0-9]+}}, w[[STRVAL]]
>
> %ret.1 = load i8, i8* %locvar
> %ret.2 = zext i8 %ret.1 to i64
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list