[llvm] 61b4702 - [AArch64][GlobalISel] Fold constants into G_GLOBAL_VALUE
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 14:59:53 PST 2021
Author: Jessica Paquette
Date: 2021-02-12T14:55:15-08:00
New Revision: 61b4702a408834228c1c139b0e9af98616774db4
URL: https://github.com/llvm/llvm-project/commit/61b4702a408834228c1c139b0e9af98616774db4
DIFF: https://github.com/llvm/llvm-project/commit/61b4702a408834228c1c139b0e9af98616774db4.diff
LOG: [AArch64][GlobalISel] Fold constants into G_GLOBAL_VALUE
This is pretty much just ports `performGlobalAddressCombine` from
AArch64ISelLowering. (AArch64 doesn't use the generic DAG combine for this.)
This adds a pre-legalize combine which looks for this pattern:
```
%g = G_GLOBAL_VALUE @x
%ptr1 = G_PTR_ADD %g, cst1
%ptr2 = G_PTR_ADD %g, cst2
...
%ptrN = G_PTR_ADD %g, cstN
```
And then, if possible, transforms it like so:
```
%g = G_GLOBAL_VALUE @x
%offset_g = G_PTR_ADD %g, -min(cst)
%ptr1 = G_PTR_ADD %offset_g, cst1
%ptr2 = G_PTR_ADD %offset_g, cst2
...
%ptrN = G_PTR_ADD %offset_g, cstN
```
Where min(cst) is the smallest out of the G_PTR_ADD constants.
This means we should save at least one G_PTR_ADD.
This also updates code in the legalizer + selector which assumes that
G_GLOBAL_VALUE will never have an offset and adds/updates relevant tests.
Differential Revision: https://reviews.llvm.org/D96624
Added:
llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets-target-features.mir
llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-add-low.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir
Modified:
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
llvm/test/CodeGen/AArch64/fold-global-offsets.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 980d61dde1d7..d963b01afd42 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -24,10 +24,20 @@ def icmp_redundant_trunc : GICombineRule<
[{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
(apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
+// AArch64-specific offset folding for G_GLOBAL_VALUE.
+def fold_global_offset_matchdata : GIDefMatchData<"std::pair<uint64_t, uint64_t>">;
+def fold_global_offset : GICombineRule<
+ (defs root:$root, fold_global_offset_matchdata:$matchinfo),
+ (match (wip_match_opcode G_GLOBAL_VALUE):$root,
+ [{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
+>;
+
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
fconstant_to_constant,
- icmp_redundant_trunc]> {
+ icmp_redundant_trunc,
+ fold_global_offset]> {
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
let StateClass = "AArch64PreLegalizerCombinerHelperState";
let AdditionalArguments = [];
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index bf44fa73e53d..30a397606181 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5628,8 +5628,10 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
return None;
// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
- // TODO: Need to check GV's offset % size if doing offset folding into globals.
- assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
+ auto Offset = Adrp.getOperand(1).getOffset();
+ if (Offset % Size != 0)
+ return None;
+
auto GV = Adrp.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return None;
@@ -5643,7 +5645,7 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
Register AdrpReg = Adrp.getOperand(0).getReg();
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
[=](MachineInstrBuilder &MIB) {
- MIB.addGlobalAddress(GV, /* Offset */ 0,
+ MIB.addGlobalAddress(GV, Offset,
OpFlags | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC);
}}};
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 09de46a6f18f..06fec9461321 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -781,7 +781,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// G_ADD_LOW instructions.
// By splitting this here, we can optimize accesses in the small code model by
// folding in the G_ADD_LOW into the load/store offset.
- auto GV = MI.getOperand(1).getGlobal();
+ auto &GlobalOp = MI.getOperand(1);
+ const auto* GV = GlobalOp.getGlobal();
if (GV->isThreadLocal())
return true; // Don't want to modify TLS vars.
@@ -791,9 +792,10 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
if (OpFlags & AArch64II::MO_GOT)
return true;
+ auto Offset = GlobalOp.getOffset();
Register DstReg = MI.getOperand(0).getReg();
auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
- .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+ .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
// Set the regclass on the dest reg too.
MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
@@ -811,6 +813,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// binary must also be loaded into address range [0, 2^48). Both of these
// properties need to be ensured at runtime when using tagged addresses.
if (OpFlags & AArch64II::MO_TAGGED) {
+ assert(!Offset &&
+ "Should not have folded in an offset for a tagged global!");
ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
.addGlobalAddress(GV, 0x100000000,
AArch64II::MO_PREL | AArch64II::MO_G3)
@@ -819,7 +823,7 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
}
MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
- .addGlobalAddress(GV, 0,
+ .addGlobalAddress(GV, Offset,
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
MI.eraseFromParent();
return true;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 6e7fe7c98512..26029b4db11f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -107,6 +107,116 @@ static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
+/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
+///
+/// e.g.
+///
+/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
+static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::pair<uint64_t, uint64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
+ MachineFunction &MF = *MI.getMF();
+ auto &GlobalOp = MI.getOperand(1);
+ auto *GV = GlobalOp.getGlobal();
+
+ // Don't allow anything that could represent offsets etc.
+ if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference(
+ GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
+ return false;
+
+ // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
+ //
+ // %g = G_GLOBAL_VALUE @x
+ // %ptr1 = G_PTR_ADD %g, cst1
+ // %ptr2 = G_PTR_ADD %g, cst2
+ // ...
+ // %ptrN = G_PTR_ADD %g, cstN
+ //
+ // Identify the *smallest* constant. We want to be able to form this:
+ //
+ // %offset_g = G_GLOBAL_VALUE @x + min_cst
+ // %g = G_PTR_ADD %offset_g, -min_cst
+ // %ptr1 = G_PTR_ADD %g, cst1
+ // ...
+ Register Dst = MI.getOperand(0).getReg();
+ uint64_t MinOffset = -1ull;
+ for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
+ if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+ auto Cst =
+ getConstantVRegValWithLookThrough(UseInstr.getOperand(2).getReg(), MRI);
+ if (!Cst)
+ return false;
+ MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
+ }
+
+ // Require that the new offset is larger than the existing one to avoid
+ // infinite loops.
+ uint64_t CurrOffset = GlobalOp.getOffset();
+ uint64_t NewOffset = MinOffset + CurrOffset;
+ if (NewOffset <= CurrOffset)
+ return false;
+
+ // Check whether folding this offset is legal. It must not go out of bounds of
+ // the referenced object to avoid violating the code model, and must be
+ // smaller than 2^21 because this is the largest offset expressible in all
+ // object formats.
+ //
+ // This check also prevents us from folding negative offsets, which will end
+ // up being treated in the same way as large positive ones. They could also
+ // cause code model violations, and aren't really common enough to matter.
+ if (NewOffset >= (1 << 21))
+ return false;
+
+ Type *T = GV->getValueType();
+ if (!T->isSized() ||
+ NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
+ return false;
+ MatchInfo = std::make_pair(NewOffset, MinOffset);
+ return true;
+}
+
+static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer,
+ std::pair<uint64_t, uint64_t> &MatchInfo) {
+ // Change:
+ //
+ // %g = G_GLOBAL_VALUE @x
+ // %ptr1 = G_PTR_ADD %g, cst1
+ // %ptr2 = G_PTR_ADD %g, cst2
+ // ...
+ // %ptrN = G_PTR_ADD %g, cstN
+ //
+ // To:
+ //
+ // %offset_g = G_GLOBAL_VALUE @x + min_cst
+ // %g = G_PTR_ADD %offset_g, -min_cst
+ // %ptr1 = G_PTR_ADD %g, cst1
+ // ...
+ // %ptrN = G_PTR_ADD %g, cstN
+ //
+ // Then, the original G_PTR_ADDs should be folded later on so that they look
+ // like this:
+ //
+ // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
+ uint64_t Offset, MinOffset;
+ std::tie(Offset, MinOffset) = MatchInfo;
+ B.setInstrAndDebugLoc(MI);
+ Observer.changingInstr(MI);
+ auto &GlobalOp = MI.getOperand(1);
+ auto *GV = GlobalOp.getGlobal();
+ GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
+ Register Dst = MI.getOperand(0).getReg();
+ Register NewGVDst = MRI.cloneVirtualRegister(Dst);
+ MI.getOperand(0).setReg(NewGVDst);
+ Observer.changedInstr(MI);
+ B.buildPtrAdd(
+ Dst, NewGVDst,
+ B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
+ return true;
+}
+
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets-target-features.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets-target-features.mir
new file mode 100644
index 000000000000..639c51d92d9c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets-target-features.mir
@@ -0,0 +1,241 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=DEFAULT,CHECK
+# RUN: llc -mtriple aarch64-apple-darwin -code-model=large -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=LARGE-MACHO,CHECK
+# RUN: llc -mtriple aarch64-apple-darwin -code-model=small -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=SMALL-MACHO,CHECK
+# RUN: llc -mtriple aarch64-linux-elf -code-model=large -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=LARGE-ELF,CHECK
+# RUN: llc -mtriple aarch64-linux-elf -code-model=tiny -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=TINY,CHECK
+# RUN: llc -mtriple aarch64-windows-coff -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=WINDOWS,CHECK
+
+# Each of these tests has a trivial pattern for folding a G_PTR_ADD into a
+# G_GLOBAL_VALUE.
+#
+# Check that given
diff erent code models/target features, we do/don't fold.
+
+--- |
+ @external_linkage = external hidden global i32
+ @common_linkage = common local_unnamed_addr global i32 0, align 4
+ @internal_linkage = internal unnamed_addr global i32 0, align 4
+ @extern_weak_linkage = extern_weak hidden global i32
+ @dll_import = external dllimport global i32
+
+ define void @test_external_linkage() { ret void }
+ define void @test_internal_linkage() { ret void }
+ define void @test_common_linkage() { ret void }
+ define void @test_extern_weak_linkage() { ret void }
+ define void @never_fold_tagged_globals() #0 { ret void }
+ define void @test_dll_import() { ret void }
+
+ attributes #0 = { "target-features"="+tagged-globals" }
+...
+---
+name: test_external_linkage
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; Large + Mach-O goes via GOT, so we can't fold.
+
+ ; DEFAULT-LABEL: name: test_external_linkage
+ ; DEFAULT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
+ ; DEFAULT: $x0 = COPY [[GV]](p0)
+ ; DEFAULT: RET_ReallyLR implicit $x0
+ ; LARGE-MACHO-LABEL: name: test_external_linkage
+ ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @external_linkage
+ ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
+ ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
+ ; LARGE-MACHO: RET_ReallyLR implicit $x0
+ ; SMALL-MACHO-LABEL: name: test_external_linkage
+ ; SMALL-MACHO: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
+ ; SMALL-MACHO: $x0 = COPY [[GV]](p0)
+ ; SMALL-MACHO: RET_ReallyLR implicit $x0
+ ; LARGE-ELF-LABEL: name: test_external_linkage
+ ; LARGE-ELF: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
+ ; LARGE-ELF: $x0 = COPY [[GV]](p0)
+ ; LARGE-ELF: RET_ReallyLR implicit $x0
+ ; TINY-LABEL: name: test_external_linkage
+ ; TINY: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
+ ; TINY: $x0 = COPY [[GV]](p0)
+ ; TINY: RET_ReallyLR implicit $x0
+ ; WINDOWS-LABEL: name: test_external_linkage
+ ; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
+ ; WINDOWS: $x0 = COPY [[GV]](p0)
+ ; WINDOWS: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @external_linkage
+ %imm:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: test_internal_linkage
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; Large + Mach-O goes via GOT, so we can't fold.
+
+ ; DEFAULT-LABEL: name: test_internal_linkage
+ ; DEFAULT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
+ ; DEFAULT: $x0 = COPY [[GV]](p0)
+ ; DEFAULT: RET_ReallyLR implicit $x0
+ ; LARGE-MACHO-LABEL: name: test_internal_linkage
+ ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @internal_linkage
+ ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
+ ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
+ ; LARGE-MACHO: RET_ReallyLR implicit $x0
+ ; SMALL-MACHO-LABEL: name: test_internal_linkage
+ ; SMALL-MACHO: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
+ ; SMALL-MACHO: $x0 = COPY [[GV]](p0)
+ ; SMALL-MACHO: RET_ReallyLR implicit $x0
+ ; LARGE-ELF-LABEL: name: test_internal_linkage
+ ; LARGE-ELF: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
+ ; LARGE-ELF: $x0 = COPY [[GV]](p0)
+ ; LARGE-ELF: RET_ReallyLR implicit $x0
+ ; TINY-LABEL: name: test_internal_linkage
+ ; TINY: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
+ ; TINY: $x0 = COPY [[GV]](p0)
+ ; TINY: RET_ReallyLR implicit $x0
+ ; WINDOWS-LABEL: name: test_internal_linkage
+ ; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
+ ; WINDOWS: $x0 = COPY [[GV]](p0)
+ ; WINDOWS: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @internal_linkage
+ %imm:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: test_common_linkage
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; DEFAULT-LABEL: name: test_common_linkage
+ ; DEFAULT: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
+ ; DEFAULT: %imm:_(s64) = G_CONSTANT i64 1
+ ; DEFAULT: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; DEFAULT: $x0 = COPY %ptr_add(p0)
+ ; DEFAULT: RET_ReallyLR implicit $x0
+ ; LARGE-MACHO-LABEL: name: test_common_linkage
+ ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
+ ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
+ ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
+ ; LARGE-MACHO: RET_ReallyLR implicit $x0
+ ; SMALL-MACHO-LABEL: name: test_common_linkage
+ ; SMALL-MACHO: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
+ ; SMALL-MACHO: %imm:_(s64) = G_CONSTANT i64 1
+ ; SMALL-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; SMALL-MACHO: $x0 = COPY %ptr_add(p0)
+ ; SMALL-MACHO: RET_ReallyLR implicit $x0
+ ; LARGE-ELF-LABEL: name: test_common_linkage
+ ; LARGE-ELF: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
+ ; LARGE-ELF: %imm:_(s64) = G_CONSTANT i64 1
+ ; LARGE-ELF: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; LARGE-ELF: $x0 = COPY %ptr_add(p0)
+ ; LARGE-ELF: RET_ReallyLR implicit $x0
+ ; TINY-LABEL: name: test_common_linkage
+ ; TINY: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
+ ; TINY: %imm:_(s64) = G_CONSTANT i64 1
+ ; TINY: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; TINY: $x0 = COPY %ptr_add(p0)
+ ; TINY: RET_ReallyLR implicit $x0
+ ; WINDOWS-LABEL: name: test_common_linkage
+ ; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @common_linkage + 1
+ ; WINDOWS: $x0 = COPY [[GV]](p0)
+ ; WINDOWS: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @common_linkage
+ %imm:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: test_extern_weak_linkage
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; DEFAULT-LABEL: name: test_extern_weak_linkage
+ ; DEFAULT: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
+ ; DEFAULT: %imm:_(s64) = G_CONSTANT i64 1
+ ; DEFAULT: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; DEFAULT: $x0 = COPY %ptr_add(p0)
+ ; DEFAULT: RET_ReallyLR implicit $x0
+ ; LARGE-MACHO-LABEL: name: test_extern_weak_linkage
+ ; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
+ ; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
+ ; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
+ ; LARGE-MACHO: RET_ReallyLR implicit $x0
+ ; SMALL-MACHO-LABEL: name: test_extern_weak_linkage
+ ; SMALL-MACHO: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
+ ; SMALL-MACHO: %imm:_(s64) = G_CONSTANT i64 1
+ ; SMALL-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; SMALL-MACHO: $x0 = COPY %ptr_add(p0)
+ ; SMALL-MACHO: RET_ReallyLR implicit $x0
+ ; LARGE-ELF-LABEL: name: test_extern_weak_linkage
+ ; LARGE-ELF: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
+ ; LARGE-ELF: %imm:_(s64) = G_CONSTANT i64 1
+ ; LARGE-ELF: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; LARGE-ELF: $x0 = COPY %ptr_add(p0)
+ ; LARGE-ELF: RET_ReallyLR implicit $x0
+ ; TINY-LABEL: name: test_extern_weak_linkage
+ ; TINY: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
+ ; TINY: %imm:_(s64) = G_CONSTANT i64 1
+ ; TINY: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; TINY: $x0 = COPY %ptr_add(p0)
+ ; TINY: RET_ReallyLR implicit $x0
+ ; WINDOWS-LABEL: name: test_extern_weak_linkage
+ ; WINDOWS: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
+ ; WINDOWS: %imm:_(s64) = G_CONSTANT i64 1
+ ; WINDOWS: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ ; WINDOWS: $x0 = COPY %ptr_add(p0)
+ ; WINDOWS: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
+ %imm:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: never_fold_tagged_globals
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: never_fold_tagged_globals
+ ; CHECK-NOT: %global:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
+ %global:_(p0) = G_GLOBAL_VALUE @external_linkage
+ %imm:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: test_dll_import
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_dll_import
+ ; CHECK-NOT: %global:_(p0) = G_GLOBAL_VALUE @dll_import + 1
+ %global:_(p0) = G_GLOBAL_VALUE @dll_import
+ %imm:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir
new file mode 100644
index 000000000000..514cef0e703d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir
@@ -0,0 +1,284 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-darwin -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ @g = external hidden global i32
+
+ %opaque = type opaque
+ @unsized = external hidden global %opaque
+
+ define void @one_ptr_add() { ret void }
+ define void @add_to_offset() { ret void }
+ define void @two_ptr_adds_same_offset() { ret void }
+ define void @two_ptr_adds_
diff erent_offset() { ret void }
+ define void @ptr_add_chain() { ret void }
+
+ define void @dont_fold_negative_offset() { ret void }
+ define void @dont_min_offset_less_than_curr_offset() { ret void }
+ define void @dont_fold_max_offset() { ret void }
+ define void @dont_fold_offset_larger_than_type_alloc() { ret void }
+ define void @dont_fold_unsized_type() { ret void }
+...
+---
+name: one_ptr_add
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; We should fold the offset 1 into the G_GLOBAL_VALUE.
+
+ ; CHECK-LABEL: name: one_ptr_add
+ ; CHECK: liveins: $x0
+ ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1
+ ; CHECK: $x0 = COPY [[GV]](p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @g
+ %offset:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ $x0 = COPY %ptr_add
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: add_to_offset
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; We should fold the offset 1 into the G_GLOBAL_VALUE, resulting in a
+ ; final offset of 4.
+
+ ; CHECK-LABEL: name: add_to_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 4
+ ; CHECK: $x0 = COPY [[GV]](p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @g + 3
+ %offset:_(s64) = G_CONSTANT i64 1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ $x0 = COPY %ptr_add
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: two_ptr_adds_same_offset
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; We're allowed to have more than one G_PTR_ADD use. We should fold 1 into
+ ; the G_GLOBAL_VALUE's offset.
+
+ ; CHECK-LABEL: name: two_ptr_adds_same_offset
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %val1:_(s64) = COPY $x0
+ ; CHECK: %val2:_(s64) = COPY $x1
+ ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1
+ ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8)
+ ; CHECK: G_STORE %val2(s64), [[GV]](p0) :: (store 8)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %val1:_(s64) = COPY $x0
+ %val2:_(s64) = COPY $x1
+ %global:_(p0) = G_GLOBAL_VALUE @g
+ %offset:_(s64) = G_CONSTANT i64 1
+ %ptr_add1:_(p0) = G_PTR_ADD %global, %offset(s64)
+ %ptr_add2:_(p0) = G_PTR_ADD %global, %offset(s64)
+ G_STORE %val1:_(s64), %ptr_add1 :: (store 8)
+ G_STORE %val2:_(s64), %ptr_add2 :: (store 8)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: two_ptr_adds_
diff erent_offset
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; The lowest offset G_PTR_ADD (2) should be folded into the G_GLOBAL_VALUE.
+ ;
+ ; The other G_PTR_ADD should have its offset decremented by 2.
+
+ ; CHECK-LABEL: name: two_ptr_adds_
diff erent_offset
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %val1:_(s64) = COPY $x0
+ ; CHECK: %val2:_(s64) = COPY $x1
+ ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 2
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK: %ptr_add2:_(p0) = G_PTR_ADD [[GV]], [[C]](s64)
+ ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8)
+ ; CHECK: G_STORE %val2(s64), %ptr_add2(p0) :: (store 8)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %val1:_(s64) = COPY $x0
+ %val2:_(s64) = COPY $x1
+ %global:_(p0) = G_GLOBAL_VALUE @g
+ %offset1:_(s64) = G_CONSTANT i64 2
+ %offset2:_(s64) = G_CONSTANT i64 10
+ %ptr_add1:_(p0) = G_PTR_ADD %global, %offset1(s64)
+ %ptr_add2:_(p0) = G_PTR_ADD %global, %offset2(s64)
+ G_STORE %val1:_(s64), %ptr_add1 :: (store 8)
+ G_STORE %val2:_(s64), %ptr_add2 :: (store 8)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: ptr_add_chain
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+ ; We should be able to fold all of the G_PTR_ADDs, except for the last one
+ ; into the G_GLOBAL_VALUE.
+ ;
+ ; (TypeAllocSize = 4, so the offset on the G_GLOBAL_VALUE can't go above
+ ; that.)
+
+ ; CHECK-LABEL: name: ptr_add_chain
+ ; CHECK: liveins: $x0
+ ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK: %dont_fold_me:_(p0) = G_PTR_ADD [[GV]], [[C]](s64)
+ ; CHECK: $x0 = COPY %dont_fold_me(p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @g
+ %offset:_(s64) = G_CONSTANT i64 1
+ %ptr_add1:_(p0) = G_PTR_ADD %global, %offset(s64)
+ %ptr_add2:_(p0) = G_PTR_ADD %ptr_add1, %offset(s64)
+ %ptr_add3:_(p0) = G_PTR_ADD %ptr_add2, %offset(s64)
+ %ptr_add4:_(p0) = G_PTR_ADD %ptr_add3, %offset(s64)
+ %dont_fold_me:_(p0) = G_PTR_ADD %ptr_add4, %offset(s64)
+ $x0 = COPY %dont_fold_me
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_fold_negative_offset
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; Do not add negative offsets to G_GLOBAL_VALUE.
+
+ ; CHECK-LABEL: name: dont_fold_negative_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g
+ ; CHECK: %offset:_(s64) = G_CONSTANT i64 -1
+ ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ ; CHECK: $x0 = COPY %ptr_add(p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @g
+ %offset:_(s64) = G_CONSTANT i64 -1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ $x0 = COPY %ptr_add
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_min_offset_less_than_curr_offset
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; Do not create smaller offsets. Ensures combine termination.
+
+ ; CHECK-LABEL: name: dont_min_offset_less_than_curr_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g + 3
+ ; CHECK: %offset:_(s64) = G_CONSTANT i64 -1
+ ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ ; CHECK: $x0 = COPY %ptr_add(p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @g + 3
+ %offset:_(s64) = G_CONSTANT i64 -1
+ %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ $x0 = COPY %ptr_add
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_fold_max_offset
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; 1 << 21 is the largest offset expressible in all object formats.
+ ; Don't fold it.
+
+ ; CHECK-LABEL: name: dont_fold_max_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g
+ ; CHECK: %offset:_(s64) = G_CONSTANT i64 4292870144
+ ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ ; CHECK: $x0 = COPY %ptr_add(p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @g
+ %offset:_(s64) = G_CONSTANT i64 4292870144 ; 1 << 21
+ %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ $x0 = COPY %ptr_add
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_fold_offset_larger_than_type_alloc
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+
+ ; Type alloc size = 4, offset = 16. Don't fold.
+
+ ; CHECK-LABEL: name: dont_fold_offset_larger_than_type_alloc
+ ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g
+ ; CHECK: %offset:_(s64) = G_CONSTANT i64 16
+ ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ ; CHECK: $x0 = COPY %ptr_add(p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @g
+ %offset:_(s64) = G_CONSTANT i64 16
+ %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_fold_unsized_type
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; Check that we don't touch unsized globals.
+
+ ; CHECK-LABEL: name: dont_fold_unsized_type
+ ; CHECK: %global:_(p0) = G_GLOBAL_VALUE @unsized
+ ; CHECK: %offset:_(s64) = G_CONSTANT i64 16
+ ; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ ; CHECK: $x0 = COPY %ptr_add(p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %global:_(p0) = G_GLOBAL_VALUE @unsized
+ %offset:_(s64) = G_CONSTANT i64 16
+ %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
+ $x0 = COPY %ptr_add(p0)
+ RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir
index 3fbd0125b31f..706bab2d0092 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global-pic.mir
@@ -6,6 +6,7 @@
target triple = "aarch64--"
@var = external global i8
define i8* @test_global() { ret i8* undef }
+ define i8* @test_global_with_offset() { ret i8* undef }
...
---
name: test_global
@@ -17,15 +18,6 @@ body: |
; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code
; model isn't 'Small'.
- ; CHECK-LABEL: name: test_global
- ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var
- ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var
- ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0)
- ; CHECK: $x0 = COPY [[PTRTOINT]](s64)
- ; CMLARGE-LABEL: name: test_global
- ; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
- ; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
- ; CMLARGE: $x0 = COPY [[PTRTOINT]](s64)
; PIC-LABEL: name: test_global
; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
@@ -34,3 +26,17 @@ body: |
%1:_(s64) = G_PTRTOINT %0
$x0 = COPY %1
...
+---
+name: test_global_with_offset
+registers:
+ - { id: 0, class: _ }
+body: |
+ bb.0:
+ ; PIC-LABEL: name: test_global_with_offset
+ ; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + 1
+ ; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
+ ; PIC: $x0 = COPY [[PTRTOINT]](s64)
+ %0(p0) = G_GLOBAL_VALUE @var + 1
+ %1:_(s64) = G_PTRTOINT %0
+ $x0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir
index da84fb43ca93..4338db9df94a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-global.mir
@@ -7,6 +7,7 @@
target triple = "aarch64--"
@var = external dso_local global i8
define i8* @test_global() { ret i8* undef }
+ define i8* @test_global_with_offset() { ret i8* undef }
...
---
name: test_global
@@ -17,16 +18,11 @@ body: |
; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code
; model isn't 'Small'.
-
; CHECK-LABEL: name: test_global
; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var
; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
- ; PIC-LABEL: name: test_global
- ; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
- ; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
- ; PIC: $x0 = COPY [[PTRTOINT]](s64)
; CMLARGE-LABEL: name: test_global
; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
@@ -35,3 +31,23 @@ body: |
%1:_(s64) = G_PTRTOINT %0
$x0 = COPY %1
...
+---
+name: test_global_with_offset
+body: |
+ bb.0:
+ ; When we legalize into ADRP + G_ADD_LOW, both should inherit the offset
+ ; from the original G_GLOBAL_VALUE.
+ ;
+ ; CHECK-LABEL: name: test_global_with_offset
+ ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var + 1
+ ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var + 1
+ ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0)
+ ; CHECK: $x0 = COPY [[PTRTOINT]](s64)
+ ; CMLARGE-LABEL: name: test_global_with_offset
+ ; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + 1
+ ; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
+ ; CMLARGE: $x0 = COPY [[PTRTOINT]](s64)
+ %0:_(p0) = G_GLOBAL_VALUE @var + 1
+ %1:_(s64) = G_PTRTOINT %0
+ $x0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-add-low.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-add-low.mir
new file mode 100644
index 000000000000..2272aaf28673
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-add-low.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+
+ @x = external hidden local_unnamed_addr global i32*, align 8
+
+ define void @select_add_low_without_offset() { ret void }
+ define void @select_add_low_with_offset() { ret void }
+ define void @select_add_low_without_adrp() { ret void }
+
+...
+---
+name: select_add_low_without_offset
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: select_add_low_without_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: %add_low:gpr64 = MOVaddr target-flags(aarch64-page) @x, target-flags(aarch64-pageoff, aarch64-nc) @x
+ ; CHECK: $x0 = COPY %add_low
+ ; CHECK: RET_ReallyLR implicit $x0
+ %copy:gpr(p0) = COPY $x0
+ %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x
+ %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
+ $x0 = COPY %add_low
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: select_add_low_with_offset
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: select_add_low_with_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: %add_low:gpr64 = MOVaddr target-flags(aarch64-page) @x + 1, target-flags(aarch64-pageoff, aarch64-nc) @x + 1
+ ; CHECK: $x0 = COPY %add_low
+ ; CHECK: RET_ReallyLR implicit $x0
+ %copy:gpr(p0) = COPY $x0
+ %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 1
+ %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 1
+ $x0 = COPY %add_low
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: select_add_low_without_adrp
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: select_add_low_without_adrp
+ ; CHECK: liveins: $x0
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: %add_low:gpr64sp = ADDXri %ptr, target-flags(aarch64-pageoff, aarch64-nc) @x, 0
+ ; CHECK: $x0 = COPY %add_low
+ ; CHECK: RET_ReallyLR implicit $x0
+ %ptr:gpr(p0) = COPY $x0
+ %add_low:gpr(p0) = G_ADD_LOW %ptr(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
+ $x0 = COPY %add_low
+ RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir
new file mode 100644
index 000000000000..7533731b2bd8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-with-offset.mir
@@ -0,0 +1,38 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=LARGE
+# RUN: llc -mtriple=aarch64 -code-model=small -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=SMALL
+# RUN: llc -mtriple=aarch64 -code-model=tiny -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=TINY
+
+--- |
+ @g = external hidden global i32
+ define void @select_gv_with_offset() { ret void }
+...
+---
+name: select_gv_with_offset
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; LARGE-LABEL: name: select_gv_with_offset
+ ; LARGE: liveins: $x0
+ ; LARGE: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @g + 1, 0
+ ; LARGE: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @g + 1, 16
+ ; LARGE: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @g + 1, 32
+ ; LARGE: %g:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @g + 1, 48
+ ; LARGE: $x0 = COPY %g
+ ; LARGE: RET_ReallyLR implicit $x0
+ ; SMALL-LABEL: name: select_gv_with_offset
+ ; SMALL: liveins: $x0
+ ; SMALL: %g:gpr64 = MOVaddr target-flags(aarch64-page) @g + 1, target-flags(aarch64-pageoff, aarch64-nc) @g + 1
+ ; SMALL: $x0 = COPY %g
+ ; SMALL: RET_ReallyLR implicit $x0
+ ; TINY-LABEL: name: select_gv_with_offset
+ ; TINY: liveins: $x0
+ ; TINY: %g:gpr64 = ADR @g + 1
+ ; TINY: $x0 = COPY %g
+ ; TINY: RET_ReallyLR implicit $x0
+ %g:gpr(p0) = G_GLOBAL_VALUE @g + 1
+ $x0 = COPY %g(p0)
+ RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
index 62c28b906dea..5bbd2a73c14e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
@@ -41,7 +41,8 @@
@x = external hidden local_unnamed_addr global i32*, align 8
define void @store_adrp_add_low() { ret void }
-
+ define void @store_adrp_add_low_foldable_offset() { ret void }
+ define void @store_adrp_add_low_unfoldable_offset() { ret void }
...
---
@@ -622,3 +623,43 @@ body: |
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
+
+...
+---
+name: store_adrp_add_low_foldable_offset
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: store_adrp_add_low_foldable_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64all = COPY $x0
+ ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x + 8
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy
+ ; CHECK: STRXui [[COPY]], %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x + 8 :: (store 8 into @x)
+ %copy:gpr(p0) = COPY $x0
+ %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 8
+ %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 8
+ G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
+
+...
+---
+name: store_adrp_add_low_unfoldable_offset
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: store_adrp_add_low_unfoldable_offset
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64all = COPY $x0
+ ; CHECK: %add_low:gpr64common = MOVaddr target-flags(aarch64-page) @x + 3, target-flags(aarch64-pageoff, aarch64-nc) @x + 3
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy
+ ; CHECK: STRXui [[COPY]], %add_low, 0 :: (store 8 into @x)
+ %copy:gpr(p0) = COPY $x0
+ %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3
+ %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3
+ G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
diff --git a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
index 40235791c524..24168f912175 100644
--- a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
@@ -1,69 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc < %s -global-isel -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=GISEL
@x1 = external hidden global [2 x i64]
@x2 = external hidden global [16777216 x i64]
@x3 = external hidden global { [9 x i8*], [8 x i8*] }
define i64 @f1() {
- ; CHECK: f1:
- ; CHECK: adrp x8, x1+16
- ; CHECK: ldr x0, [x8, :lo12:x1+16]
+; CHECK-LABEL: f1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, x1+16
+; CHECK-NEXT: ldr x0, [x8, :lo12:x1+16]
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: f1:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, x1+16
+; GISEL-NEXT: ldr x0, [x8, :lo12:x1+16]
+; GISEL-NEXT: ret
%l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
ret i64 %l
}
define i64 @f2() {
- ; CHECK: f2:
- ; CHECK: adrp x8, x1
- ; CHECK: add x8, x8, :lo12:x1
- ; CHECK: ldr x0, [x8, #24]
+; CHECK-LABEL: f2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, x1
+; CHECK-NEXT: add x8, x8, :lo12:x1
+; CHECK-NEXT: ldr x0, [x8, #24]
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: f2:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, x1
+; GISEL-NEXT: add x8, x8, :lo12:x1
+; GISEL-NEXT: ldr x0, [x8, #24]
+; GISEL-NEXT: ret
+
%l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
ret i64 %l
}
define i64 @f3() {
- ; CHECK: f3:
- ; CHECK: adrp x8, x1+1
- ; CHECK: add x8, x8, :lo12:x1+1
- ; CHECK: ldr x0, [x8]
+; CHECK-LABEL: f3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, x1+1
+; CHECK-NEXT: add x8, x8, :lo12:x1+1
+; CHECK-NEXT: ldr x0, [x8]
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: f3:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, x1+1
+; GISEL-NEXT: add x8, x8, :lo12:x1+1
+; GISEL-NEXT: ldr x0, [x8]
+; GISEL-NEXT: ret
%l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
ret i64 %l
}
define [2 x i64] @f4() {
- ; CHECK: f4:
- ; CHECK: adrp x8, x2+8
- ; CHECK: add x8, x8, :lo12:x2+8
- ; CHECK: ldp x0, x1, [x8]
+; FIXME: GlobalISel misses the opportunity to form a LDP here.
+;
+; CHECK-LABEL: f4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, x2+8
+; CHECK-NEXT: add x8, x8, :lo12:x2+8
+; CHECK-NEXT: ldp x0, x1, [x8]
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: f4:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x9, x2+8
+; GISEL-NEXT: adrp x8, x2+8
+; GISEL-NEXT: add x9, x9, :lo12:x2+8
+; GISEL-NEXT: ldr x0, [x8, :lo12:x2+8]
+; GISEL-NEXT: ldr x1, [x9, #8]
+; GISEL-NEXT: ret
%l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
ret [2 x i64] %l
}
define i64 @f5() {
- ; CHECK: f5:
- ; CHECK: adrp x8, x2+2097144
- ; CHECK: ldr x0, [x8, :lo12:x2+2097144]
- ; CHECK: ret
+; CHECK-LABEL: f5:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, x2+2097144
+; CHECK-NEXT: ldr x0, [x8, :lo12:x2+2097144]
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: f5:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, x2+2097144
+; GISEL-NEXT: ldr x0, [x8, :lo12:x2+2097144]
+; GISEL-NEXT: ret
%l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
ret i64 %l
}
define i64 @f6() {
- ; CHECK: f6:
- ; CHECK: adrp x8, x2
- ; CHECK: add x8, x8, :lo12:x2
- ; CHECK: mov w9, #2097152
- ; CHECK: ldr x0, [x8, x9]
- ; CHECK: ret
+; CHECK-LABEL: f6:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, x2
+; CHECK-NEXT: add x8, x8, :lo12:x2
+; CHECK-NEXT: mov w9, #2097152
+; CHECK-NEXT: ldr x0, [x8, x9]
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: f6:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x9, x2
+; GISEL-NEXT: mov w8, #2097152
+; GISEL-NEXT: add x9, x9, :lo12:x2
+; GISEL-NEXT: ldr x0, [x9, x8]
+; GISEL-NEXT: ret
%l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
ret i64 %l
}
define i32 @f7() {
+; FIXME: GlobalISel doesn't handle vectors well.
+;
+; CHECK-LABEL: f7:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, x3+108
+; CHECK-NEXT: ldr w0, [x8, :lo12:x3+108]
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: f7:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: adrp x8, x3+88
+; GISEL-NEXT: add x8, x8, :lo12:x3+88
+; GISEL-NEXT: mov v0.d[1], x8
+; GISEL-NEXT: mov w9, #64
+; GISEL-NEXT: mov d1, v0.d[1]
+; GISEL-NEXT: sub x8, x9, #64 // =64
+; GISEL-NEXT: fmov x11, d1
+; GISEL-NEXT: fmov x10, d0
+; GISEL-NEXT: lsl x12, x11, x8
+; GISEL-NEXT: cmp x9, #64 // =64
+; GISEL-NEXT: lsr x8, x11, x8
+; GISEL-NEXT: orr x11, x12, x10, lsr #0
+; GISEL-NEXT: csel x8, x11, x8, lo
+; GISEL-NEXT: cmp x9, #0 // =0
+; GISEL-NEXT: csel x8, x10, x8, eq
+; GISEL-NEXT: ldr w0, [x8, #20]
+; GISEL-NEXT: ret
+
entry:
- ; CHECK: f7
- ; CHECK: adrp x8, x3+108
- ; CHECK: ldr w0, [x8, :lo12:x3+108]
%l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> <i64 undef, i64 ptrtoint (i8** getelementptr inbounds ({ [9 x i8*], [8 x i8*] }, { [9 x i8*], [8 x i8*] }* @x3, i64 0, inrange i32 1, i64 2) to i64)> to i128), i128 64) to i64) to i32*), i64 5)
ret i32 %l
}
More information about the llvm-commits
mailing list