[llvm] 8a12553 - [ARM] Improve codegen of volatile load/store of i64
Victor Campos via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 11 03:19:34 PDT 2020
Author: Victor Campos
Date: 2020-03-11T10:19:27Z
New Revision: 8a12553223180246eeafaa0fa7bfa11e834d34b6
URL: https://github.com/llvm/llvm-project/commit/8a12553223180246eeafaa0fa7bfa11e834d34b6
DIFF: https://github.com/llvm/llvm-project/commit/8a12553223180246eeafaa0fa7bfa11e834d34b6.diff
LOG: [ARM] Improve codegen of volatile load/store of i64
Summary:
Instead of generating two i32 instructions for each load or store of a volatile
i64 value (two LDRs or STRs), now emit LDRD/STRD.
These improvements cover architectures implementing ARMv5TE or Thumb-2.
The code generation explicitly deviates from using the register-offset
variant of LDRD/STRD. In this variant, the register allocated to the
register-offset cannot be reused in any of the remaining operands. Such
restriction seems to be non-trivial to implement in LLVM, thus it is
left as a to-do.
Reviewers: dmgreen, efriedma, john.brawn, nickdesaulniers
Reviewed By: efriedma, nickdesaulniers
Subscribers: danielkiss, alanphipps, hans, nathanchance, nickdesaulniers, vvereschaka, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70072
Added:
llvm/test/CodeGen/ARM/i64_volatile_load_store.ll
Modified:
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrThumb2.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 25eabf53dee0..4dcb1f62577a 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1955,6 +1955,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
+ case ARM::LOADDUAL:
+ case ARM::STOREDUAL: {
+ Register PairReg = MI.getOperand(0).getReg();
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
+ .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
+ Opcode == ARM::LOADDUAL ? RegState::Define : 0)
+ .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
+ Opcode == ARM::LOADDUAL ? RegState::Define : 0);
+ for (unsigned i = 1; i < MI.getNumOperands(); i++)
+ MIB.add(MI.getOperand(i));
+ MIB.add(predOps(ARMCC::AL));
+ MIB.cloneMemRefs(MI);
+ MI.eraseFromParent();
+ return true;
+ }
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 69f4ceb5c924..239707881ff3 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -145,6 +145,8 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
// Thumb 2 Addressing Modes:
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ template <unsigned Shift>
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
@@ -1303,6 +1305,33 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
return true;
}
+template <unsigned Shift>
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+ OffImm =
+ CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
@@ -3583,6 +3612,59 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N);
return;
}
+ case ARMISD::LDRD: {
+ if (Subtarget->isThumb2())
+ break; // TableGen handles isel in this case.
+ SDValue Base, RegOffset, ImmOffset;
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Addr = N->getOperand(1);
+ SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
+ if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
+ // The register-offset variant of LDRD mandates that the register
+ // allocated to RegOffset is not reused in any of the remaining operands.
+ // This restriction is currently not enforced. Therefore emitting this
+ // variant is explicitly avoided.
+ Base = Addr;
+ RegOffset = CurDAG->getRegister(0, MVT::i32);
+ }
+ SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
+ SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
+ {MVT::Untyped, MVT::Other}, Ops);
+ SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+ SDValue(New, 0));
+ SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+ SDValue(New, 0));
+ transferMemOperands(N, New);
+ ReplaceUses(SDValue(N, 0), Lo);
+ ReplaceUses(SDValue(N, 1), Hi);
+ ReplaceUses(SDValue(N, 2), SDValue(New, 1));
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
+ case ARMISD::STRD: {
+ if (Subtarget->isThumb2())
+ break; // TableGen handles isel in this case.
+ SDValue Base, RegOffset, ImmOffset;
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Addr = N->getOperand(3);
+ SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
+ if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
+ // The register-offset variant of STRD mandates that the register
+ // allocated to RegOffset is not reused in any of the remaining operands.
+ // This restriction is currently not enforced. Therefore emitting this
+ // variant is explicitly avoided.
+ Base = Addr;
+ RegOffset = CurDAG->getRegister(0, MVT::i32);
+ }
+ SDNode *RegPair =
+ createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
+ SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
+ SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceUses(SDValue(N, 0), SDValue(New, 0));
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
case ARMISD::LOOP_DEC: {
SDValue Ops[] = { N->getOperand(1),
N->getOperand(2),
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5ac30801858f..676f45a52c03 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1074,6 +1074,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
// MVE lowers 64 bit shifts to lsll and lsrl
// assuming that ISD::SRL and SRA of i64 are already marked custom
@@ -1611,6 +1613,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::LDRD: return "ARMISD::LDRD";
+ case ARMISD::STRD: return "ARMISD::STRD";
+
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
@@ -9104,6 +9109,25 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
}
+void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
+
+ if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
+ !Subtarget->isThumb1Only() && LD->isVolatile()) {
+ SDLoc dl(N);
+ SDValue Result = DAG.getMemIntrinsicNode(
+ ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
+ {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
+ SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
+ SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ Results.append({Pair, Result.getValue(2)});
+ }
+}
+
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
@@ -9133,6 +9157,38 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
ST->getMemOperand());
}
+static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT MemVT = ST->getMemoryVT();
+ assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
+
+ if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
+ !Subtarget->isThumb1Only() && ST->isVolatile()) {
+ SDNode *N = Op.getNode();
+ SDLoc dl(N);
+
+ SDValue Lo = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
+ DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
+ MVT::i32));
+ SDValue Hi = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
+ DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
+ MVT::i32));
+
+ return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
+ {ST->getChain(), Lo, Hi, ST->getBasePtr()},
+ MemVT, ST->getMemOperand());
+ } else if (Subtarget->hasMVEIntegerOps() &&
+ ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ MemVT == MVT::v16i1))) {
+ return LowerPredicateStore(Op, DAG);
+ }
+
+ return SDValue();
+}
+
static bool isZeroVector(SDValue N) {
return (ISD::isBuildVectorAllZeros(N.getNode()) ||
(N->getOpcode() == ARMISD::VMOVIMM &&
@@ -9366,7 +9422,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::LOAD:
return LowerPredicateLoad(Op, DAG);
case ISD::STORE:
- return LowerPredicateStore(Op, DAG);
+ return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::ATOMIC_LOAD:
@@ -9470,7 +9526,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ABS:
lowerABS(N, Results, DAG);
return ;
-
+ case ISD::LOAD:
+ LowerLOAD(N, Results, DAG);
+ break;
}
if (Res.getNode())
Results.push_back(Res);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index c15c439ddb23..61a8d53ac6e6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -295,7 +295,11 @@ class VectorType;
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD
+ VST4LN_UPD,
+
+ // Load/Store of dual registers
+ LDRD,
+ STRD
};
} // end namespace ARMISD
@@ -752,6 +756,8 @@ class VectorType;
SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
+ void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 9e491e726b41..b006b5e7e08f 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -243,6 +243,12 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
+def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
// Vector operations shared between NEON and MVE
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -2729,6 +2735,14 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
Requires<[IsARM, HasV5TE]>;
}
+let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
+def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
+ 64, IIC_iLoad_d_r, []>,
+ Requires<[IsARM, HasV5TE]> {
+ let AM = AddrMode3;
+}
+}
+
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "lda", "\t$Rt, $addr", []>;
def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -3004,6 +3018,14 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
}
}
+let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
+def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
+ 64, IIC_iStore_d_r, []>,
+ Requires<[IsARM, HasV5TE]> {
+ let AM = AddrMode3;
+}
+}
+
// Indexed stores
multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c6e2e04965d2..d69526b32c7c 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -270,7 +270,8 @@ def t2am_imm8_offset : MemOperand,
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : MemOperand {
+class T2AddrMode_Imm8s4 : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8s4";
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -1448,7 +1449,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
+ [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1629,7 +1631,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
+ [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
Sched<[WriteST]>;
// Indexed stores
diff --git a/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll
new file mode 100644
index 000000000000..e025c854409b
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll
@@ -0,0 +1,183 @@
+; RUN: llc -mtriple=armv5e-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV5TE,CHECK
+; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-T2,CHECK
+; RUN: llc -mtriple=armv4t-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV4T,CHECK
+
+ at x = common dso_local global i64 0, align 8
+ at y = common dso_local global i64 0, align 8
+
+define void @test() {
+entry:
+; CHECK-LABEL: test:
+; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
+; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
+; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
+; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #4]
+; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #4]
+; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
+ %0 = load volatile i64, i64* @x, align 8
+ store volatile i64 %0, i64* @y, align 8
+ ret void
+}
+
+define void @test_offset() {
+entry:
+; CHECK-LABEL: test_offset:
+; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
+; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
+; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
+; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
+; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
+; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #-4]
+ %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 -4) to i64*), align 8
+ store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 -4) to i64*), align 8
+ ret void
+}
+
+define void @test_offset_1() {
+; CHECK-LABEL: test_offset_1:
+; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #255]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #255]
+; CHECK-T2: adds [[ADDR0:r[0-9]+]], #255
+; CHECK-T2-NEXT: adds [[ADDR1:r[0-9]+]], #255
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #255]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #259]
+; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #259]
+; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #255]
+entry:
+ %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 255) to i64*), align 8
+ store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 255) to i64*), align 8
+ ret void
+}
+
+define void @test_offset_2() {
+; CHECK-LABEL: test_offset_2:
+; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #256
+; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #256
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
+; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
+; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
+; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #256]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #256]
+; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #256]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #260]
+; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #260]
+; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #256]
+entry:
+ %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 256) to i64*), align 8
+ store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 256) to i64*), align 8
+ ret void
+}
+
+define void @test_offset_3() {
+; CHECK-LABEL: test_offset_3:
+; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1020
+; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1020
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
+; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
+; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
+; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #1020]
+; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
+; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1024]
+; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1020]
+entry:
+ %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1020) to i64*), align 8
+ store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1020) to i64*), align 8
+ ret void
+}
+
+define void @test_offset_4() {
+; CHECK-LABEL: test_offset_4:
+; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV5TE: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1024
+; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1024
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-T2: movw [[ADDR1:r[0-9]+]], :lower16:y
+; CHECK-T2-NEXT: movw [[ADDR0:r[0-9]+]], :lower16:x
+; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
+; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
+; CHECK-T2-NEXT: add.w [[ADDR0]], [[ADDR0]], #1024
+; CHECK-T2-NEXT: add.w [[ADDR1]], [[ADDR1]], #1024
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1028]
+; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1028]
+; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1024]
+entry:
+ %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1024) to i64*), align 8
+ store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1024) to i64*), align 8
+ ret void
+}
+
+define void @test_stack() {
+; CHECK-LABEL: test_stack:
+; CHECK-ARMV5TE: sub sp, sp, #8
+; CHECK-ARMV5TE-NEXT: mov r1, #0
+; CHECK-ARMV5TE-NEXT: mov r0, #5
+; CHECK-ARMV5TE-NEXT: strd r0, r1, [sp]
+; CHECK-ARMV5TE-NEXT: ldrd r0, r1, [sp]
+; CHECK-T2: sub sp, #8
+; CHECK-T2-NEXT: mov r0, sp
+; CHECK-T2-NEXT: movs r1, #0
+; CHECK-T2-NEXT: movs r2, #5
+; CHECK-T2-NEXT: strd r2, r1, [r0]
+; CHECK-T2-NEXT: ldrd r0, r1, [r0]
+; CHECK-ARMV4T: sub sp, sp, #8
+; CHECK-ARMV4T-NEXT: mov r0, #0
+; CHECK-ARMV4T-NEXT: str r0, [sp, #4]
+; CHECK-ARMV4T-NEXT: mov r0, #5
+; CHECK-ARMV4T-NEXT: str r0, [sp]
+; CHECK-ARMV4T-NEXT: ldr r0, [sp]
+; CHECK-ARMV4T-NEXT: ldr r0, [sp, #4]
+entry:
+ %0 = alloca i64
+ store volatile i64 5, i64* %0
+ %1 = load volatile i64, i64* %0
+ ret void
+}
More information about the llvm-commits
mailing list