[llvm] r330630 - Reland r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses.", with a fix for the bot failure.
Peter Collingbourne via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 23 12:09:34 PDT 2018
Author: pcc
Date: Mon Apr 23 12:09:34 2018
New Revision: 330630
URL: http://llvm.org/viewvc/llvm-project?rev=330630&view=rev
Log:
Reland r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses.", with a fix for the bot failure.
This reland includes a check to prevent the DAG combiner from folding an
offset that is smaller than the existing one. This can cause oscillations
between two possible DAGs, which was the cause of the hang and later assertion
failure observed on the lnt-ctmark-aarch64-O3-flto bot.
http://green.lab.llvm.org/green/job/lnt-ctmark-aarch64-O3-flto/2024/
Original commit message:
> This is a code size win in code that takes offseted addresses
> frequently, such as C++ constructors that typically need to compute
> an offseted address of a vtable. This reduces the size of Chromium
> for Android's .text section by 108KB.
Differential Revision: https://reviews.llvm.org/D45199
Added:
llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll
llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll
llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp Mon Apr 23 12:09:34 2018
@@ -743,14 +743,16 @@ bool AArch64DAGToDAGISel::SelectAddrMode
if (!GAN)
return true;
- const GlobalValue *GV = GAN->getGlobal();
- unsigned Alignment = GV->getAlignment();
- Type *Ty = GV->getValueType();
- if (Alignment == 0 && Ty->isSized())
- Alignment = DL.getABITypeAlignment(Ty);
+ if (GAN->getOffset() % Size == 0) {
+ const GlobalValue *GV = GAN->getGlobal();
+ unsigned Alignment = GV->getAlignment();
+ Type *Ty = GV->getValueType();
+ if (Alignment == 0 && Ty->isSized())
+ Alignment = DL.getABITypeAlignment(Ty);
- if (Alignment >= Size)
- return true;
+ if (Alignment >= Size)
+ return true;
+ }
}
if (CurDAG->isBaseWithConstantOffset(N)) {
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Apr 23 12:09:34 2018
@@ -577,6 +577,8 @@ AArch64TargetLowering::AArch64TargetLowe
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::GlobalAddress);
+
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
@@ -3677,7 +3679,8 @@ AArch64TargetLowering::LowerReturn(SDVal
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
- return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
+ N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
@@ -3752,8 +3755,9 @@ SDValue AArch64TargetLowering::LowerGlob
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
- assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
- "unexpected offset in global node");
+ if (OpFlags != AArch64II::MO_NO_FLAG)
+ assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
+ "unexpected offset in global node");
// This also catches the large code model case for Darwin.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
@@ -4991,10 +4995,8 @@ SDValue AArch64TargetLowering::LowerShif
bool AArch64TargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
- DEBUG(dbgs() << "Skipping offset folding global address: ");
- DEBUG(GA->dump());
- DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
- "addresses\n");
+ // Offsets are folded in the DAG combine rather than here so that we can
+ // intelligently choose an offset based on the uses.
return false;
}
@@ -10617,6 +10619,59 @@ static SDValue performNVCASTCombine(SDNo
return SDValue();
}
+// If all users of the globaladdr are of the form (globaladdr + constant), find
+// the smallest constant, fold it into the globaladdr's offset and rewrite the
+// globaladdr as (globaladdr + constant) - constant.
+static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget,
+ const TargetMachine &TM) {
+ auto *GN = dyn_cast<GlobalAddressSDNode>(N);
+ if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
+ AArch64II::MO_NO_FLAG)
+ return SDValue();
+
+ uint64_t MinOffset = -1ull;
+ for (SDNode *N : GN->uses()) {
+ if (N->getOpcode() != ISD::ADD)
+ return SDValue();
+ auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
+ if (!C)
+ C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+ MinOffset = std::min(MinOffset, C->getZExtValue());
+ }
+ uint64_t Offset = MinOffset + GN->getOffset();
+
+ // Require that the new offset is larger than the existing one. Otherwise, we
+ // can end up oscillating between two possible DAGs, for example,
+ // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
+ if (Offset <= uint64_t(GN->getOffset()))
+ return SDValue();
+
+ // Check whether folding this offset is legal. It must not go out of bounds of
+ // the referenced object to avoid violating the code model, and must be
+ // smaller than 2^21 because this is the largest offset expressible in all
+ // object formats.
+ //
+ // This check also prevents us from folding negative offsets, which will end
+ // up being treated in the same way as large positive ones. They could also
+ // cause code model violations, and aren't really common enough to matter.
+ if (Offset >= (1 << 21))
+ return SDValue();
+
+ const GlobalValue *GV = GN->getGlobal();
+ Type *T = GV->getValueType();
+ if (!T->isSized() ||
+ Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
+ return SDValue();
+
+ SDLoc DL(GN);
+ SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
+ return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
+ DAG.getConstant(MinOffset, DL, MVT::i64));
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -10704,6 +10759,8 @@ SDValue AArch64TargetLowering::PerformDA
default:
break;
}
+ case ISD::GlobalAddress:
+ return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll Mon Apr 23 12:09:34 2018
@@ -5,32 +5,31 @@
; base + offset (imm9)
; CHECK: @t1
-; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
+; CHECK: ldr xzr, [x0, #8]
; CHECK: ret
-define void @t1() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
+define void @t1(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
; base + offset (> imm9)
; CHECK: @t2
-; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
+; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264
; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
; CHECK: ret
-define void @t2() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
+define void @t2(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
; CHECK: @t3
-; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
+; CHECK: ldr xzr, [x0, #32760]
; CHECK: ret
-define void @t3() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
+define void @t3(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -38,10 +37,10 @@ define void @t3() {
; base + unsigned offset (> imm12 * size of type in bytes)
; CHECK: @t4
; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
-; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
+; CHECK: ldr xzr, [x0, x[[NUM]]]
; CHECK: ret
-define void @t4() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
+define void @t4(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -58,12 +57,12 @@ define void @t5(i64 %a) {
; base + reg + imm
; CHECK: @t6
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
+; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3
; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
; CHECK: ret
-define void @t6(i64 %a) {
- %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
+define void @t6(i64 %a, i64* %object) {
+ %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a
%incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll Mon Apr 23 12:09:34 2018
@@ -264,149 +264,196 @@ entry:
; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
; registers for unscaled vector accesses
- at str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
-define <1 x i64> @fct0() nounwind readonly ssp {
+define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct0:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <1 x i64>*
+ %0 = load <1 x i64>, <1 x i64>* %q, align 8
ret <1 x i64> %0
}
-define <2 x i32> @fct1() nounwind readonly ssp {
+define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct1:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i32>*
+ %0 = load <2 x i32>, <2 x i32>* %q, align 8
ret <2 x i32> %0
}
-define <4 x i16> @fct2() nounwind readonly ssp {
+define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct2:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i16>*
+ %0 = load <4 x i16>, <4 x i16>* %q, align 8
ret <4 x i16> %0
}
-define <8 x i8> @fct3() nounwind readonly ssp {
+define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct3:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i8>*
+ %0 = load <8 x i8>, <8 x i8>* %q, align 8
ret <8 x i8> %0
}
-define <2 x i64> @fct4() nounwind readonly ssp {
+define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct4:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i64>*
+ %0 = load <2 x i64>, <2 x i64>* %q, align 16
ret <2 x i64> %0
}
-define <4 x i32> @fct5() nounwind readonly ssp {
+define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct5:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i32>*
+ %0 = load <4 x i32>, <4 x i32>* %q, align 16
ret <4 x i32> %0
}
-define <8 x i16> @fct6() nounwind readonly ssp {
+define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct6:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i16>*
+ %0 = load <8 x i16>, <8 x i16>* %q, align 16
ret <8 x i16> %0
}
-define <16 x i8> @fct7() nounwind readonly ssp {
+define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct7:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <16 x i8>*
+ %0 = load <16 x i8>, <16 x i8>* %q, align 16
ret <16 x i8> %0
}
-define void @fct8() nounwind ssp {
+define void @fct8(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct8:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
- store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <1 x i64>*
+ %0 = load <1 x i64>, <1 x i64>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <1 x i64>*
+ store <1 x i64> %0, <1 x i64>* %q2, align 8
ret void
}
-define void @fct9() nounwind ssp {
+define void @fct9(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct9:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
- store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i32>*
+ %0 = load <2 x i32>, <2 x i32>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <2 x i32>*
+ store <2 x i32> %0, <2 x i32>* %q2, align 8
ret void
}
-define void @fct10() nounwind ssp {
+define void @fct10(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct10:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
- store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i16>*
+ %0 = load <4 x i16>, <4 x i16>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <4 x i16>*
+ store <4 x i16> %0, <4 x i16>* %q2, align 8
ret void
}
-define void @fct11() nounwind ssp {
+define void @fct11(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct11:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
- store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i8>*
+ %0 = load <8 x i8>, <8 x i8>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <8 x i8>*
+ store <8 x i8> %0, <8 x i8>* %q2, align 8
ret void
}
-define void @fct12() nounwind ssp {
+define void @fct12(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct12:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
- store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i64>*
+ %0 = load <2 x i64>, <2 x i64>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <2 x i64>*
+ store <2 x i64> %0, <2 x i64>* %q2, align 16
ret void
}
-define void @fct13() nounwind ssp {
+define void @fct13(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct13:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
- store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i32>*
+ %0 = load <4 x i32>, <4 x i32>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <4 x i32>*
+ store <4 x i32> %0, <4 x i32>* %q2, align 16
ret void
}
-define void @fct14() nounwind ssp {
+define void @fct14(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct14:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
- store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i16>*
+ %0 = load <8 x i16>, <8 x i16>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <8 x i16>*
+ store <8 x i16> %0, <8 x i16>* %q2, align 16
ret void
}
-define void @fct15() nounwind ssp {
+define void @fct15(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct15:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
- store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <16 x i8>*
+ %0 = load <16 x i8>, <16 x i8>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <16 x i8>*
+ store <16 x i8> %0, <16 x i8>* %q2, align 16
ret void
}
Added: llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll?rev=330630&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll Mon Apr 23 12:09:34 2018
@@ -0,0 +1,69 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
+
+ at x1 = external hidden global [2 x i64]
+ at x2 = external hidden global [16777216 x i64]
+ at x3 = external hidden global { [9 x i8*], [8 x i8*] }
+
+define i64 @f1() {
+ ; CHECK: f1:
+ ; CHECK: adrp x8, x1+16
+ ; CHECK: ldr x0, [x8, :lo12:x1+16]
+ %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
+ ret i64 %l
+}
+
+define i64 @f2() {
+ ; CHECK: f2:
+ ; CHECK: adrp x8, x1
+ ; CHECK: add x8, x8, :lo12:x1
+ ; CHECK: ldr x0, [x8, #24]
+ %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
+ ret i64 %l
+}
+
+define i64 @f3() {
+ ; CHECK: f3:
+ ; CHECK: adrp x8, x1+1
+ ; CHECK: add x8, x8, :lo12:x1+1
+ ; CHECK: ldr x0, [x8]
+ %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
+ ret i64 %l
+}
+
+define [2 x i64] @f4() {
+ ; CHECK: f4:
+ ; CHECK: adrp x8, x2+8
+ ; CHECK: add x8, x8, :lo12:x2+8
+ ; CHECK: ldp x0, x1, [x8]
+ %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
+ ret [2 x i64] %l
+}
+
+define i64 @f5() {
+ ; CHECK: f5:
+ ; CHECK: adrp x8, x2+2097144
+ ; CHECK: ldr x0, [x8, :lo12:x2+2097144]
+ ; CHECK: ret
+ %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
+ ret i64 %l
+}
+
+define i64 @f6() {
+ ; CHECK: f6:
+ ; CHECK: adrp x8, x2
+ ; CHECK: add x8, x8, :lo12:x2
+ ; CHECK: orr w9, wzr, #0x200000
+ ; CHECK: ldr x0, [x8, x9]
+ ; CHECK: ret
+ %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
+ ret i64 %l
+}
+
+define i32 @f7() {
+entry:
+ ; CHECK: f7
+ ; CHECK: adrp x8, x3+108
+ ; CHECK: ldr w0, [x8, :lo12:x3+108]
+ %l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> <i64 undef, i64 ptrtoint (i8** getelementptr inbounds ({ [9 x i8*], [8 x i8*] }, { [9 x i8*], [8 x i8*] }* @x3, i64 0, inrange i32 1, i64 2) to i64)> to i128), i128 64) to i64) to i32*), i64 5)
+ ret i32 %l
+}
Modified: llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll Mon Apr 23 12:09:34 2018
@@ -10,8 +10,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a
;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x at PAGE
;CHECK-APPLE-IOS-NOT: adrp
;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x at PAGEOFF
-;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y at PAGE
-;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y at PAGEOFF
+;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y at PAGE+12
+;CHECK-APPLE-IOS: str w1, [x9, __MergedGlobals_y at PAGEOFF+12]
%x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
%y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
store i32 %a1, i32* %x3, align 4
Modified: llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll Mon Apr 23 12:09:34 2018
@@ -44,9 +44,9 @@ define void @f2(i32 %a1, i32 %a2) nounwi
; CHECK-LABEL: f3:
define void @f3(i32 %a1, i32 %a2) minsize nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #8]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8
+; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: ret
store i32 %a1, i32* @m3, align 4
store i32 %a2, i32* @n3, align 4
@@ -57,10 +57,9 @@ define void @f3(i32 %a1, i32 %a2) minsiz
; CHECK-LABEL: f4:
define void @f4(i32 %a1, i32 %a2) nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
; CHECK-NEXT: adrp x9, _n4 at PAGE
-; CHECK-NEXT: str w0, [x8, #8]
+; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8]
; CHECK-NEXT: str w1, [x9, _n4 at PAGEOFF]
; CHECK-NEXT: ret
store i32 %a1, i32* @m3, align 4
Modified: llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll Mon Apr 23 12:09:34 2018
@@ -38,9 +38,9 @@ define void @f2(i32 %a1, i32 %a2, i32 %a
; CHECK-LABEL: f3:
define void @f3(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #12]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12
+; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: ret
store i32 %a1, i32* @m2, align 4
store i32 %a2, i32* @n2, align 4
More information about the llvm-commits
mailing list