[llvm] r256004 - [AArch64] Promote loads from stores
Jun Bum Lim via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 21 07:19:51 PST 2015
I did run ref in a57, but not in a53. Let me try in a53.
Thanks for finding this.
Best,
Jun
-----Original Message-----
From: Charlie Turner [mailto:charlesturner7c5 at gmail.com]
Sent: Monday, December 21, 2015 10:13 AM
To: Jun Bum Lim
Cc: llvm-commits at lists.llvm.org
Subject: Re: [llvm] r256004 - [AArch64] Promote loads from stores
The miscompares occur on the Cortex-A53 in A64 mode.
On 21 December 2015 at 15:11, Charlie Turner <charlesturner7c5 at gmail.com> wrote:
> Hi Jun,
> This commit causes miscompares in spec2000's gcc benchmark. Do you run
> this in ref mode? In particular, 200.s and scilab.s.
>
> --Charlie.
>
> On 18 December 2015 at 18:08, Jun Bum Lim via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>> Author: junbuml
>> Date: Fri Dec 18 12:08:30 2015
>> New Revision: 256004
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=256004&view=rev
>> Log:
>> [AArch64] Promote loads from stores
>>
>> This change promotes load instructions which directly read from
>> stores by replacing them with mov instructions. If the store is wider
>> than the load, the load will be replaced with a bitfield extract.
>> For example :
>> STRWui %W1, %X0, 1
>> %W0 = LDRHHui %X0, 3
>> becomes
>> STRWui %W1, %X0, 1
>> %W0 = UBFMWri %W1, 16, 31
>>
>> Added:
>> llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll
>> Modified:
>> llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
>> llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
>> llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
>>
>> Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AAr
>> ch64LoadStoreOptimizer.cpp?rev=256004&r1=256003&r2=256004&view=diff
>> =====================================================================
>> =========
>> --- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
>> (original)
>> +++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Fri
>> +++ Dec 18 12:08:30 2015
>> @@ -43,6 +43,7 @@ STATISTIC(NumUnscaledPairCreated,
>> "Number of load/store from unscaled generated");
>> STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
>> STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores
>> promoted");
>> +STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores
>> +promoted");
>>
>> static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
>> cl::init(20), cl::Hidden); @@
>> -93,6 +94,12 @@ struct AArch64LoadStoreOpt : public Mach
>> MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
>> LdStPairFlags &Flags,
>> unsigned Limit);
>> +
>> + // Scan the instructions looking for a store that writes to the
>> + address from // which the current load instruction reads. Return true if one is found.
>> + bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
>> + MachineBasicBlock::iterator &StoreI);
>> +
>> // Merge the two instructions indicated into a single pair-wise instruction.
>> // If MergeForward is true, erase the first instruction and fold its
>> // operation into the second. If false, the reverse. Return the
>> instruction @@ -102,6 +109,11 @@ struct AArch64LoadStoreOpt : public Mach
>> MachineBasicBlock::iterator Paired,
>> const LdStPairFlags &Flags);
>>
>> + // Promote the load that reads directly from the address stored to.
>> + MachineBasicBlock::iterator
>> + promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
>> + MachineBasicBlock::iterator StoreI);
>> +
>> // Scan the instruction list to find a base register update that can
>> // be combined with the current instruction (a load or store) using
>> // pre or post indexed addressing with writeback. Scan forwards.
>> @@ -128,6 +140,9 @@ struct AArch64LoadStoreOpt : public Mach
>> // Find and merge foldable ldr/str instructions.
>> bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
>>
>> + // Find and promote load instructions which read directly from store.
>> + bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
>> +
>> // Check if converting two narrow loads into a single wider load with
>> // bitfield extracts could be enabled.
>> bool enableNarrowLdMerge(MachineFunction &Fn); @@ -399,6 +414,36
>> @@ static unsigned getMatchingPairOpcode(un
>> }
>> }
>>
>> +static unsigned isMatchingStore(MachineInstr *LoadInst,
>> + MachineInstr *StoreInst) {
>> + unsigned LdOpc = LoadInst->getOpcode();
>> + unsigned StOpc = StoreInst->getOpcode();
>> + switch (LdOpc) {
>> + default:
>> + llvm_unreachable("Unsupported load instruction!");
>> + case AArch64::LDRBBui:
>> + return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
>> + StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
>> + case AArch64::LDURBBi:
>> + return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
>> + StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
>> + case AArch64::LDRHHui:
>> + return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
>> + StOpc == AArch64::STRXui;
>> + case AArch64::LDURHHi:
>> + return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
>> + StOpc == AArch64::STURXi;
>> + case AArch64::LDRWui:
>> + return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
>> + case AArch64::LDURWi:
>> + return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
>> + case AArch64::LDRXui:
>> + return StOpc == AArch64::STRXui;
>> + case AArch64::LDURXi:
>> + return StOpc == AArch64::STURXi;
>> + }
>> +}
>> +
>> static unsigned getPreIndexedOpcode(unsigned Opc) {
>> switch (Opc) {
>> default:
>> @@ -553,6 +598,21 @@ static const MachineOperand &getLdStOffs
>> return MI->getOperand(Idx);
>> }
>>
>> +static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
>> + MachineInstr *StoreInst) {
>> + assert(isMatchingStore(LoadInst, StoreInst) && "Expect only
>> +matched ld/st.");
>> + int LoadSize = getMemScale(LoadInst);
>> + int StoreSize = getMemScale(StoreInst);
>> + int UnscaledStOffset = isUnscaledLdSt(StoreInst)
>> + ? getLdStOffsetOp(StoreInst).getImm()
>> + : getLdStOffsetOp(StoreInst).getImm() *
>> +StoreSize;
>> + int UnscaledLdOffset = isUnscaledLdSt(LoadInst)
>> + ? getLdStOffsetOp(LoadInst).getImm()
>> + : getLdStOffsetOp(LoadInst).getImm() *
>> +LoadSize;
>> + return (UnscaledStOffset <= UnscaledLdOffset) &&
>> + (UnscaledLdOffset + LoadSize <= (UnscaledStOffset +
>> +StoreSize)); }
>> +
>> // Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
>> static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
>> MachineInstr *Op1) { @@ -800,6
>> +860,106 @@ AArch64LoadStoreOpt::mergePairedInsns(Ma
>> return NextI;
>> }
>>
>> +MachineBasicBlock::iterator
>> +AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
>> +
>> +MachineBasicBlock::iterator StoreI) {
>> + MachineBasicBlock::iterator NextI = LoadI;
>> + ++NextI;
>> +
>> + int LoadSize = getMemScale(LoadI); int StoreSize =
>> + getMemScale(StoreI); unsigned LdRt = getLdStRegOp(LoadI).getReg();
>> + unsigned StRt = getLdStRegOp(StoreI).getReg(); bool IsStoreXReg =
>> + TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
>> +
>> + assert((IsStoreXReg ||
>> + TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
>> + "Unexpected RegClass");
>> +
>> + MachineInstr *BitExtMI;
>> + if (LoadSize == StoreSize) {
>> + // Remove the load, if the destination register of the loads is the same
>> + // register for stored value.
>> + if (StRt == LdRt) {
>> + DEBUG(dbgs() << "Remove load instruction:\n ");
>> + DEBUG(LoadI->print(dbgs()));
>> + DEBUG(dbgs() << "\n");
>> + LoadI->eraseFromParent();
>> + return NextI;
>> + }
>> + // Replace the load with a mov if the load and store are in the same size.
>> + BitExtMI =
>> + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
>> + TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
>> + .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
>> + .addReg(StRt)
>> + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
>> + } else {
>> + // FIXME: Currently we disable this transformation in big-endian targets as
>> + // performance and correctness are verified only in little-endian.
>> + if (!Subtarget->isLittleEndian())
>> + return NextI;
>> + bool IsUnscaled = isUnscaledLdSt(LoadI);
>> + assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match");
>> + assert(LoadSize < StoreSize && "Invalid load size");
>> + int UnscaledLdOffset = IsUnscaled
>> + ? getLdStOffsetOp(LoadI).getImm()
>> + : getLdStOffsetOp(LoadI).getImm() * LoadSize;
>> + int UnscaledStOffset = IsUnscaled
>> + ? getLdStOffsetOp(StoreI).getImm()
>> + : getLdStOffsetOp(StoreI).getImm() * StoreSize;
>> + int Width = LoadSize * 8;
>> + int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
>> + int Imms = Immr + Width - 1;
>> + unsigned DestReg = IsStoreXReg
>> + ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
>> + &AArch64::GPR64RegClass)
>> + : LdRt;
>> +
>> + assert(((UnscaledLdOffset) >= UnscaledStOffset &&
>> + (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
>> + "Invalid offset");
>> +
>> + Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
>> + Imms = Immr + Width - 1;
>> + if (UnscaledLdOffset == UnscaledStOffset) {
>> + uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
>> + | ((Immr) << 6) // immr
>> + | ((Imms) << 0) // imms
>> + ;
>> +
>> + BitExtMI =
>> + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
>> + TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
>> + DestReg)
>> + .addReg(StRt)
>> + .addImm(AndMaskEncoded);
>> + } else {
>> + BitExtMI =
>> + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
>> + TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
>> + DestReg)
>> + .addReg(StRt)
>> + .addImm(Immr)
>> + .addImm(Imms);
>> + }
>> + }
>> +
>> + DEBUG(dbgs() << "Promoting load by replacing :\n ");
>> + DEBUG(StoreI->print(dbgs()));
>> + DEBUG(dbgs() << " ");
>> + DEBUG(LoadI->print(dbgs()));
>> + DEBUG(dbgs() << " with instructions:\n ");
>> + DEBUG(StoreI->print(dbgs()));
>> + DEBUG(dbgs() << " ");
>> + DEBUG((BitExtMI)->print(dbgs()));
>> + DEBUG(dbgs() << "\n");
>> +
>> + // Erase the old instructions.
>> + LoadI->eraseFromParent();
>> + return NextI;
>> +}
>> +
>> /// trackRegDefsUses - Remember what registers the specified
>> instruction uses /// and modifies.
>> static void trackRegDefsUses(const MachineInstr *MI, BitVector
>> &ModifiedRegs, @@ -863,6 +1023,60 @@ static bool mayAlias(MachineInstr *MIa,
>> return false;
>> }
>>
>> +bool AArch64LoadStoreOpt::findMatchingStore(
>> + MachineBasicBlock::iterator I, unsigned Limit,
>> + MachineBasicBlock::iterator &StoreI) {
>> + MachineBasicBlock::iterator E = I->getParent()->begin();
>> + MachineBasicBlock::iterator MBBI = I;
>> + MachineInstr *FirstMI = I;
>> + unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
>> +
>> + // Track which registers have been modified and used between the
>> + first insn // and the second insn.
>> + BitVector ModifiedRegs, UsedRegs;
>> + ModifiedRegs.resize(TRI->getNumRegs());
>> + UsedRegs.resize(TRI->getNumRegs());
>> +
>> + for (unsigned Count = 0; MBBI != E && Count < Limit;) {
>> + --MBBI;
>> + MachineInstr *MI = MBBI;
>> + // Skip DBG_VALUE instructions. Otherwise debug info can affect the
>> + // optimization by changing how far we scan.
>> + if (MI->isDebugValue())
>> + continue;
>> + // Now that we know this is a real instruction, count it.
>> + ++Count;
>> +
>> + // If the load instruction reads directly from the address to which the
>> + // store instruction writes and the stored value is not modified, we can
>> + // promote the load. Since we do not handle stores with pre-/post-index,
>> + // it's unnecessary to check if BaseReg is modified by the store itself.
>> + if (MI->mayStore() && isMatchingStore(FirstMI, MI) &&
>> + BaseReg == getLdStBaseOp(MI).getReg() &&
>> + isLdOffsetInRangeOfSt(FirstMI, MI) &&
>> + !ModifiedRegs[getLdStRegOp(MI).getReg()]) {
>> + StoreI = MBBI;
>> + return true;
>> + }
>> +
>> + if (MI->isCall())
>> + return false;
>> +
>> + // Update modified / uses register lists.
>> + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
>> +
>> + // Otherwise, if the base register is modified, we have no match, so
>> + // return early.
>> + if (ModifiedRegs[BaseReg])
>> + return false;
>> +
>> + // If we encounter a store aliased with the load, return early.
>> + if (MI->mayStore() && mayAlias(FirstMI, MI, TII))
>> + return false;
>> + }
>> + return false;
>> +}
>> +
>> /// findMatchingInsn - Scan the instructions looking for a
>> load/store that can /// be combined with the current instruction into a load/store pair.
>> MachineBasicBlock::iterator
>> @@ -1263,6 +1477,31 @@ MachineBasicBlock::iterator AArch64LoadS
>> return E;
>> }
>>
>> +bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
>> + MachineBasicBlock::iterator &MBBI) {
>> + MachineInstr *MI = MBBI;
>> + // If this is a volatile load, don't mess with it.
>> + if (MI->hasOrderedMemoryRef())
>> + return false;
>> +
>> + // Make sure this is a reg+imm.
>> + // FIXME: It is possible to extend it to handle reg+reg cases.
>> + if (!getLdStOffsetOp(MI).isImm())
>> + return false;
>> +
>> + // Look backward up to ScanLimit instructions.
>> + MachineBasicBlock::iterator StoreI;
>> + if (findMatchingStore(MBBI, ScanLimit, StoreI)) {
>> + ++NumLoadsFromStoresPromoted;
>> + // Promote the load. Keeping the iterator straight is a
>> + // pain, so we let the merge routine tell us what the next instruction
>> + // is after it's done mucking about.
>> + MBBI = promoteLoadFromStore(MBBI, StoreI);
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> bool AArch64LoadStoreOpt::tryToMergeLdStInst(
>> MachineBasicBlock::iterator &MBBI) {
>> MachineInstr *MI = MBBI;
>> @@ -1307,7 +1546,16 @@ bool AArch64LoadStoreOpt::optimizeBlock(
>> bool enableNarrowLdOpt) {
>> bool Modified = false;
>> // Three tranformations to do here:
>> - // 1) Find narrow loads that can be converted into a single wider
>> load
>> + // 1) Find loads that directly read from stores and promote them by
>> + // replacing with mov instructions. If the store is wider than the load,
>> + // the load will be replaced with a bitfield extract.
>> + // e.g.,
>> + // str w1, [x0, #4]
>> + // ldrh w2, [x0, #6]
>> + // ; becomes
>> + // str w1, [x0, #4]
>> + // lsr w2, w1, #16
>> + // 2) Find narrow loads that can be converted into a single wider
>> + load
>> // with bitfield extract instructions.
>> // e.g.,
>> // ldrh w0, [x2]
>> @@ -1316,14 +1564,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(
>> // ldr w0, [x2]
>> // ubfx w1, w0, #16, #16
>> // and w0, w0, #ffff
>> - // 2) Find loads and stores that can be merged into a single load
>> or store
>> + // 3) Find loads and stores that can be merged into a single load
>> + or store
>> // pair instruction.
>> // e.g.,
>> // ldr x0, [x2]
>> // ldr x1, [x2, #8]
>> // ; becomes
>> // ldp x0, x1, [x2]
>> - // 3) Find base register updates that can be merged into the load
>> or store
>> + // 4) Find base register updates that can be merged into the load
>> + or store
>> // as a base-reg writeback.
>> // e.g.,
>> // ldr x0, [x2]
>> @@ -1332,6 +1580,35 @@ bool AArch64LoadStoreOpt::optimizeBlock(
>> // ldr x0, [x2], #4
>>
>> for (MachineBasicBlock::iterator MBBI = MBB.begin(), E =
>> MBB.end();
>> + MBBI != E;) {
>> + MachineInstr *MI = MBBI;
>> + switch (MI->getOpcode()) {
>> + default:
>> + // Just move on to the next instruction.
>> + ++MBBI;
>> + break;
>> + // Scaled instructions.
>> + case AArch64::LDRBBui:
>> + case AArch64::LDRHHui:
>> + case AArch64::LDRWui:
>> + case AArch64::LDRXui:
>> + // Unscaled instructions.
>> + case AArch64::LDURBBi:
>> + case AArch64::LDURHHi:
>> + case AArch64::LDURWi:
>> + case AArch64::LDURXi: {
>> + if (tryToPromoteLoadFromStore(MBBI)) {
>> + Modified = true;
>> + break;
>> + }
>> + ++MBBI;
>> + break;
>> + }
>> + // FIXME: Do the other instructions.
>> + }
>> + }
>> +
>> + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E =
>> + MBB.end();
>> enableNarrowLdOpt && MBBI != E;) {
>> MachineInstr *MI = MBBI;
>> switch (MI->getOpcode()) {
>>
>> Modified:
>> llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/a
>> rm64-alloca-frame-pointer-offset.ll?rev=256004&r1=256003&r2=256004&vi
>> ew=diff
>> =====================================================================
>> =========
>> ---
>> llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
>> (original)
>> +++ llvm/trunk/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset
>> +++ .ll Fri Dec 18 12:08:30 2015
>> @@ -1,9 +1,9 @@
>> ; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
>>
>> ; CHECK: foo
>> -; CHECK: ldr w[[REG:[0-9]+]], [x19, #264] -; CHECK: str w[[REG]],
>> [x19, #132] -; CHECK: ldr w{{[0-9]+}}, [x19, #264]
>> +; CHECK: str w[[REG0:[0-9]+]], [x19, #264] ; CHECK: mov
>> +w[[REG1:[0-9]+]], w[[REG0]] ; CHECK: str w[[REG1]], [x19, #132]
>>
>> define i32 @foo(i32 %a) nounwind {
>> %retval = alloca i32, align 4
>>
>> Added: llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/a
>> rm64-ld-from-st.ll?rev=256004&view=auto
>> =====================================================================
>> =========
>> --- llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll (added)
>> +++ llvm/trunk/test/CodeGen/AArch64/arm64-ld-from-st.ll Fri Dec 18
>> +++ 12:08:30 2015
>> @@ -0,0 +1,666 @@
>> +; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs |
>> +FileCheck %s
>> +
>> +; CHECK-LABEL: Str64Ldr64
>> +; CHECK: mov x0, x1
>> +define i64 @Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i64*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 1
>> + %1 = load i64, i64* %arrayidx1
>> + ret i64 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr32_0
>> +; CHECK: and x0, x1, #0xffffffff
>> +define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i32*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 2
>> + %1 = load i32, i32* %arrayidx1
>> + ret i32 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr32_1
>> +; CHECK: lsr x0, x1, #32
>> +define i32 @Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i32*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 3
>> + %1 = load i32, i32* %arrayidx1
>> + ret i32 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr16_0
>> +; CHECK: and x0, x1, #0xffff
>> +define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 4
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr16_1
>> +; CHECK: ubfx x0, x1, #16, #16
>> +define i16 @Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 5
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr16_2
>> +; CHECK: ubfx x0, x1, #32, #16
>> +define i16 @Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 6
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr16_3
>> +; CHECK: lsr x0, x1, #48
>> +define i16 @Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 7
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_0
>> +; CHECK: and x0, x1, #0xff
>> +define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 8
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_1
>> +; CHECK: ubfx x0, x1, #8, #8
>> +define i8 @Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 9
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_2
>> +; CHECK: ubfx x0, x1, #16, #8
>> +define i8 @Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 10
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_3
>> +; CHECK: ubfx x0, x1, #24, #8
>> +define i8 @Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 11
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_4
>> +; CHECK: ubfx x0, x1, #32, #8
>> +define i8 @Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 12
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_5
>> +; CHECK: ubfx x0, x1, #40, #8
>> +define i8 @Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 13
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_6
>> +; CHECK: ubfx x0, x1, #48, #8
>> +define i8 @Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 14
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str64Ldr8_7
>> +; CHECK: lsr x0, x1, #56
>> +define i8 @Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 15
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str32Ldr32
>> +; CHECK: mov w0, w1
>> +define i32 @Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i32*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1
>> + %1 = load i32, i32* %arrayidx1
>> + ret i32 %1
>> +}
>> +
>> +; CHECK-LABEL: Str32Ldr16_0
>> +; CHECK: and w0, w1, #0xffff
>> +define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Str32Ldr16_1
>> +; CHECK: lsr w0, w1, #16
>> +define i16 @Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Str32Ldr8_0
>> +; CHECK: and w0, w1, #0xff
>> +define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 4
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str32Ldr8_1
>> +; CHECK: ubfx w0, w1, #8, #8
>> +define i8 @Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 5
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str32Ldr8_2
>> +; CHECK: ubfx w0, w1, #16, #8
>> +define i8 @Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 6
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str32Ldr8_3
>> +; CHECK: lsr w0, w1, #24
>> +define i8 @Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 7
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str16Ldr16
>> +; CHECK: mov w0, w1
>> +define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i16* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
>> + store i16 %v, i16* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Str16Ldr8_0
>> +; CHECK: and w0, w1, #0xff
>> +define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i16* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
>> + store i16 %v, i16* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 2
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Str16Ldr8_1
>> +; CHECK: ubfx w0, w1, #8, #8
>> +define i8 @Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i16* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
>> + store i16 %v, i16* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 3
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr64
>> +; CHECK: mov x0, x1
>> +define i64 @Unscaled_Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i64*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 -1
>> + %1 = load i64, i64* %arrayidx1
>> + ret i64 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr32_0 ; CHECK: and x0, x1,
>> +#0xffffffff define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64
>> +%v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i32*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -2
>> + %1 = load i32, i32* %arrayidx1
>> + ret i32 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr32_1 ; CHECK: lsr x0, x1, #32 define
>> +i32 @Unscaled_Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i32*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
>> + %1 = load i32, i32* %arrayidx1
>> + ret i32 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr16_0 ; CHECK: and x0, x1, #0xffff
>> +define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n)
>> +{
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -4
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr16_1 ; CHECK: ubfx x0, x1, #16, #16
>> +define i16 @Unscaled_Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n)
>> +{
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr16_2 ; CHECK: ubfx x0, x1, #32, #16
>> +define i16 @Unscaled_Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n)
>> +{
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr16_3 ; CHECK: lsr x0, x1, #48 define
>> +i16 @Unscaled_Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_0
>> +; CHECK: and x0, x1, #0xff
>> +define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -8
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_1
>> +; CHECK: ubfx x0, x1, #8, #8
>> +define i8 @Unscaled_Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -7
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_2
>> +; CHECK: ubfx x0, x1, #16, #8
>> +define i8 @Unscaled_Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -6
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_3
>> +; CHECK: ubfx x0, x1, #24, #8
>> +define i8 @Unscaled_Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -5
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_4
>> +; CHECK: ubfx x0, x1, #32, #8
>> +define i8 @Unscaled_Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_5
>> +; CHECK: ubfx x0, x1, #40, #8
>> +define i8 @Unscaled_Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_6
>> +; CHECK: ubfx x0, x1, #48, #8
>> +define i8 @Unscaled_Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str64Ldr8_7
>> +; CHECK: lsr x0, x1, #56
>> +define i8 @Unscaled_Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i64* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
>> + store i64 %v, i64* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str32Ldr32
>> +; CHECK: mov w0, w1
>> +define i32 @Unscaled_Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i32*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
>> + %1 = load i32, i32* %arrayidx1
>> + ret i32 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str32Ldr16_0 ; CHECK: and w0, w1, #0xffff
>> +define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n)
>> +{
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str32Ldr16_1
>> +; CHECK: lsr w0, w1, #16
>> +define i16 @Unscaled_Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n)
>> +{
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str32Ldr8_0
>> +; CHECK: and w0, w1, #0xff
>> +define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str32Ldr8_1
>> +; CHECK: ubfx w0, w1, #8, #8
>> +define i8 @Unscaled_Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str32Ldr8_2
>> +; CHECK: ubfx w0, w1, #16, #8
>> +define i8 @Unscaled_Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str32Ldr8_3
>> +; CHECK: lsr w0, w1, #24
>> +define i8 @Unscaled_Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str16Ldr16
>> +; CHECK: mov w0, w1
>> +define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i16* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
>> + store i16 %v, i16* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str16Ldr8_0
>> +; CHECK: and w0, w1, #0xff
>> +define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i16* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
>> + store i16 %v, i16* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_Str16Ldr8_1
>> +; CHECK: ubfx w0, w1, #8, #8
>> +define i8 @Unscaled_Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i16* %P to i8*
>> + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
>> + store i16 %v, i16* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
>> + %1 = load i8, i8* %arrayidx1
>> + ret i8 %1
>> +}
>> +
>> +; CHECK-LABEL: StrVolatileLdr
>> +; CHECK: ldrh
>> +define i16 @StrVolatileLdr(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
>> + %1 = load volatile i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: StrNotInRangeLdr
>> +; CHECK: ldrh
>> +define i16 @StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: Unscaled_StrNotInRangeLdr ; CHECK: ldurh define i16
>> + at Unscaled_StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
>> + store i32 %v, i32* %arrayidx0
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +; CHECK-LABEL: StrCallLdr
>> +; CHECK: ldrh
>> +define i16 @StrCallLdr(i32* nocapture %P, i32 %v, i64 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + %c = call i1 @test_dummy()
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>> +
>> +declare i1 @test_dummy()
>> +
>> +; CHECK-LABEL: StrStrLdr
>> +; CHECK: ldrh
>> +define i16 @StrStrLdr(i32 %v, i32* %P, i32* %P2, i32 %n) {
>> +entry:
>> + %0 = bitcast i32* %P to i16*
>> + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
>> + store i32 %v, i32* %arrayidx0
>> + store i32 %n, i32* %P2
>> + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
>> + %1 = load i16, i16* %arrayidx1
>> + ret i16 %1
>> +}
>>
>> Modified: llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/r
>> egress-tblgen-chains.ll?rev=256004&r1=256003&r2=256004&view=diff
>> =====================================================================
>> =========
>> --- llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
>> (original)
>> +++ llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll Fri Dec
>> +++ 18 12:08:30 2015
>> @@ -27,8 +27,8 @@ define i64 @test_chains() {
>>
>> ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] ; CHECK:
>> add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK: sturb {{w[0-9]+}}, [x29,
>> [[LOCADDR]]] -; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
>> +; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]] ; CHECK: mov
>> +{{w[0-9]+}}, w[[STRVAL]]
>>
>> %ret.1 = load i8, i8* %locvar
>> %ret.2 = zext i8 %ret.1 to i64
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list