[llvm] 0111da2 - [GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing mode usage.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 2 12:31:38 PDT 2021
Author: Amara Emerson
Date: 2021-07-02T12:31:21-07:00
New Revision: 0111da2ef80adc516f13d7c7f4125eb9eea6c08f
URL: https://github.com/llvm/llvm-project/commit/0111da2ef80adc516f13d7c7f4125eb9eea6c08f
DIFF: https://github.com/llvm/llvm-project/commit/0111da2ef80adc516f13d7c7f4125eb9eea6c08f.diff
LOG: [GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing mode usage.
We're trying to match a few pointer computation patterns here for
re-association opportunities.
1) Isolating a constant operand to be on the RHS, e.g.:
G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
2) Folding two constants in each sub-tree as long as such folding
doesn't break a legal addressing mode.
G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
AArch64 code size improvements on CTMark with -Os:
Program before after diff
pairlocalalign 251048 251044 -0.0%
consumer-typeset 421820 421812 -0.0%
kc 431348 431320 -0.0%
SPASS 413404 413300 -0.0%
clamscan 384396 384220 -0.0%
tramp3d-v4 370640 370412 -0.1%
lencod 432096 431772 -0.1%
bullet 479400 478796 -0.1%
sqlite3 288504 288072 -0.1%
7zip-benchmark 573796 570768 -0.5%
Geomean difference -0.1%
Differential Revision: https://reviews.llvm.org/D105069
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 2c8b6b14a67ec..6e62ef7dd7285 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -511,8 +511,14 @@ class CombinerHelper {
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
/// Use a function which takes in a MachineIRBuilder to perform a combine.
+ /// By default, it erases the instruction \p MI from the function.
bool applyBuildFn(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo);
+ /// Use a function which takes in a MachineIRBuilder to perform a combine.
+ /// This variant does not erase \p MI after calling the build function.
+ bool applyBuildFnNoErase(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
bool matchRotateOutOfRange(MachineInstr &MI);
@@ -528,6 +534,11 @@ class CombinerHelper {
bool matchBitfieldExtractFromAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+ /// Reassociate pointer calculations with G_ADD involved, to allow better
+ /// addressing mode usage.
+ bool matchReassocPtrAdd(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
@@ -589,6 +600,11 @@ class CombinerHelper {
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit,
const unsigned MemSizeInBits);
+
+ /// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing
+ /// a re-association of its operands would break an existing legal addressing
+ /// mode that the address computation currently represents.
+ bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd);
};
} // namespace llvm
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a01d9b26f351c..678855bd4a11c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -645,6 +645,13 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
bitfield_extract_from_and]>;
+def reassoc_ptradd : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_PTR_ADD):$root,
+ [{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
+def reassocs : GICombineGroup<[reassoc_ptradd]>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -678,9 +685,10 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one]>;
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
- extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload,
+ extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
+ reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
known_bits_simplifications, ext_ext_fold,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index bb02c101fd048..9a71500357863 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1720,6 +1720,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
if (!MaybeImmVal)
return false;
+ // Don't do this combine if there multiple uses of the first PTR_ADD,
+ // since we may be able to compute the second PTR_ADD as an immediate
+ // offset anyway. Folding the first offset into the second may cause us
+ // to go beyond the bounds of our legal addressing modes.
+ if (!MRI.hasOneNonDBGUse(Add2))
+ return false;
+
MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
@@ -3955,6 +3962,13 @@ bool CombinerHelper::applyBuildFn(
return true;
}
+bool CombinerHelper::applyBuildFnNoErase(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+ return true;
+}
+
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -4129,6 +4143,129 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
+bool CombinerHelper::reassociationCanBreakAddressingModePattern(
+ MachineInstr &PtrAdd) {
+ assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+
+ Register Src1Reg = PtrAdd.getOperand(1).getReg();
+ MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+ if (!Src1Def)
+ return false;
+
+ Register Src2Reg = PtrAdd.getOperand(2).getReg();
+
+ if (MRI.hasOneNonDBGUse(Src1Reg))
+ return false;
+
+ auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ if (!C1)
+ return false;
+ auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ const APInt &C1APIntVal = *C1;
+ const APInt &C2APIntVal = *C2;
+ const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
+
+ for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+ // This combine may end up running before ptrtoint/inttoptr combines
+ // manage to eliminate redundant conversions, so try to look through them.
+ MachineInstr *ConvUseMI = &UseMI;
+ unsigned ConvUseOpc = ConvUseMI->getOpcode();
+ while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
+ ConvUseOpc == TargetOpcode::G_PTRTOINT) {
+ Register DefReg = ConvUseMI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(DefReg))
+ break;
+ ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
+ }
+ auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
+ ConvUseOpc == TargetOpcode::G_STORE;
+ if (!LoadStore)
+ continue;
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ unsigned AS =
+ MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
+ Type *AccessTy =
+ getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
+ PtrAdd.getMF()->getFunction().getContext());
+ const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocPtrAdd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+ // We're trying to match a few pointer computation patterns here for
+ // re-association opportunities.
+ // 1) Isolating a constant operand to be on the RHS, e.g.:
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ //
+ // 2) Folding two constants in each sub-tree as long as such folding
+ // doesn't break a legal addressing mode.
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
+ MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
+
+ if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
+ // Try to match example 1).
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=,&MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ } else {
+ // Try to match example 2.
+ Register LHSSrc1 = LHS->getOperand(1).getReg();
+ Register LHSSrc2 = LHS->getOperand(2).getReg();
+ auto C1 = getConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ }
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
new file mode 100644
index 0000000000000..1c5ed0eb39d21
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
@@ -0,0 +1,153 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+---
+name: test1_noreassoc_legal_already_new_is_illegal
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %2:_(s64) = G_CONSTANT i64 4777
+ %4:_(s64) = G_CONSTANT i64 6
+ %9:_(s32) = G_CONSTANT i32 0
+ %10:_(p0) = G_PTR_ADD %0, %2(s64)
+ %11:_(p0) = G_PTR_ADD %10, %4(s64)
+ %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+ G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test2_reassoc_already_legal_new_also_legal
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %2:_(s64) = G_CONSTANT i64 10
+ %4:_(s64) = G_CONSTANT i64 6
+ %9:_(s32) = G_CONSTANT i32 0
+ %10:_(p0) = G_PTR_ADD %0, %2(s64)
+ %11:_(p0) = G_PTR_ADD %10, %4(s64)
+ %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+ G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test3_noreassoc_only_oneuse
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test3_noreassoc_only_oneuse
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %10:_(s64) = G_CONSTANT i64 4783
+ %9:_(p0) = G_PTR_ADD %0, %10(s64)
+ %7:_(s32) = G_LOAD %9(p0) :: (load 4)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test4_reassoc_existing_is_already_illegal
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %2:_(s64) = G_CONSTANT i64 17
+ %4:_(s64) = G_CONSTANT i64 4079
+ %9:_(s32) = G_CONSTANT i32 0
+ %10:_(p0) = G_PTR_ADD %0, %2(s64)
+ %11:_(p0) = G_PTR_ADD %10, %4(s64)
+ %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+ G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test5_add_on_rhs
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: test5_add_on_rhs
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = G_ADD %1, %2
+ %4:_(p0) = G_PTR_ADD %0, %3(s64)
+ %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
More information about the llvm-commits
mailing list