[llvm] 0111da2 - [GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing mode usage.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 2 12:31:38 PDT 2021


Author: Amara Emerson
Date: 2021-07-02T12:31:21-07:00
New Revision: 0111da2ef80adc516f13d7c7f4125eb9eea6c08f

URL: https://github.com/llvm/llvm-project/commit/0111da2ef80adc516f13d7c7f4125eb9eea6c08f
DIFF: https://github.com/llvm/llvm-project/commit/0111da2ef80adc516f13d7c7f4125eb9eea6c08f.diff

LOG: [GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing mode usage.

We're trying to match a few pointer computation patterns here for
re-association opportunities.
1) Isolating a constant operand to be on the RHS, e.g.:
   G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)

2) Folding two constants in each sub-tree as long as such folding
   doesn't break a legal addressing mode.
   G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)

AArch64 code size improvements on CTMark with -Os:
Program              before  after   diff
 pairlocalalign      251048  251044 -0.0%
 consumer-typeset    421820  421812 -0.0%
 kc                  431348  431320 -0.0%
 SPASS               413404  413300 -0.0%
 clamscan            384396  384220 -0.0%
 tramp3d-v4          370640  370412 -0.1%
 lencod              432096  431772 -0.1%
 bullet              479400  478796 -0.1%
 sqlite3             288504  288072 -0.1%
 7zip-benchmark      573796  570768 -0.5%
 Geomean difference                 -0.1%

Differential Revision: https://reviews.llvm.org/D105069

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 2c8b6b14a67ec..6e62ef7dd7285 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -511,8 +511,14 @@ class CombinerHelper {
       SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
 
   /// Use a function which takes in a MachineIRBuilder to perform a combine.
+  /// By default, it erases the instruction \p MI from the function.
   bool applyBuildFn(MachineInstr &MI,
                     std::function<void(MachineIRBuilder &)> &MatchInfo);
+  /// Use a function which takes in a MachineIRBuilder to perform a combine.
+  /// This variant does not erase \p MI after calling the build function.
+  bool applyBuildFnNoErase(MachineInstr &MI,
+                           std::function<void(MachineIRBuilder &)> &MatchInfo);
+
   bool matchFunnelShiftToRotate(MachineInstr &MI);
   void applyFunnelShiftToRotate(MachineInstr &MI);
   bool matchRotateOutOfRange(MachineInstr &MI);
@@ -528,6 +534,11 @@ class CombinerHelper {
   bool matchBitfieldExtractFromAnd(
       MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
 
+  /// Reassociate pointer calculations with G_ADD involved, to allow better
+  /// addressing mode usage.
+  bool matchReassocPtrAdd(MachineInstr &MI,
+                          std::function<void(MachineIRBuilder &)> &MatchInfo);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);
@@ -589,6 +600,11 @@ class CombinerHelper {
       SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
       const SmallVector<Register, 8> &RegsToVisit,
       const unsigned MemSizeInBits);
+
+  /// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing
+  /// a re-association of its operands would break an existing legal addressing
+  /// mode that the address computation currently represents.
+  bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd);
 };
 } // namespace llvm
 

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a01d9b26f351c..678855bd4a11c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -645,6 +645,13 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
 
 def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
                                             bitfield_extract_from_and]>;
+def reassoc_ptradd : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_PTR_ADD):$root,
+    [{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
+def reassocs : GICombineGroup<[reassoc_ptradd]>;
 
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -678,9 +685,10 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
                                        mul_by_neg_one]>;
 
 def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
-    extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload,
+    extract_vec_elt_combines, combines_for_extload,
     combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
+    reassocs, ptr_add_immed_chain,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
     width_reduction_combines, select_combines,
     known_bits_simplifications, ext_ext_fold,

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index bb02c101fd048..9a71500357863 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1720,6 +1720,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
   if (!MaybeImmVal)
     return false;
 
+  // Don't do this combine if there multiple uses of the first PTR_ADD,
+  // since we may be able to compute the second PTR_ADD as an immediate
+  // offset anyway. Folding the first offset into the second may cause us
+  // to go beyond the bounds of our legal addressing modes.
+  if (!MRI.hasOneNonDBGUse(Add2))
+    return false;
+
   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
     return false;
@@ -3955,6 +3962,13 @@ bool CombinerHelper::applyBuildFn(
   return true;
 }
 
+bool CombinerHelper::applyBuildFnNoErase(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  Builder.setInstrAndDebugLoc(MI);
+  MatchInfo(Builder);
+  return true;
+}
+
 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
   unsigned Opc = MI.getOpcode();
@@ -4129,6 +4143,129 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
   return true;
 }
 
+bool CombinerHelper::reassociationCanBreakAddressingModePattern(
+    MachineInstr &PtrAdd) {
+  assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+
+  Register Src1Reg = PtrAdd.getOperand(1).getReg();
+  MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+  if (!Src1Def)
+    return false;
+
+  Register Src2Reg = PtrAdd.getOperand(2).getReg();
+
+  if (MRI.hasOneNonDBGUse(Src1Reg))
+    return false;
+
+  auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+  if (!C1)
+    return false;
+  auto C2 = getConstantVRegVal(Src2Reg, MRI);
+  if (!C2)
+    return false;
+
+  const APInt &C1APIntVal = *C1;
+  const APInt &C2APIntVal = *C2;
+  const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
+
+  for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+    // This combine may end up running before ptrtoint/inttoptr combines
+    // manage to eliminate redundant conversions, so try to look through them.
+    MachineInstr *ConvUseMI = &UseMI;
+    unsigned ConvUseOpc = ConvUseMI->getOpcode();
+    while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
+           ConvUseOpc == TargetOpcode::G_PTRTOINT) {
+      Register DefReg = ConvUseMI->getOperand(0).getReg();
+      if (!MRI.hasOneNonDBGUse(DefReg))
+        break;
+      ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
+    }
+    auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
+                     ConvUseOpc == TargetOpcode::G_STORE;
+    if (!LoadStore)
+      continue;
+    // Is x[offset2] already not a legal addressing mode? If so then
+    // reassociating the constants breaks nothing (we test offset2 because
+    // that's the one we hope to fold into the load or store).
+    TargetLoweringBase::AddrMode AM;
+    AM.HasBaseReg = true;
+    AM.BaseOffs = C2APIntVal.getSExtValue();
+    unsigned AS =
+        MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
+    Type *AccessTy =
+        getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
+                      PtrAdd.getMF()->getFunction().getContext());
+    const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
+    if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+                                   AccessTy, AS))
+      continue;
+
+    // Would x[offset1+offset2] still be a legal addressing mode?
+    AM.BaseOffs = CombinedValue;
+    if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+                                   AccessTy, AS))
+      return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchReassocPtrAdd(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+  // We're trying to match a few pointer computation patterns here for
+  // re-association opportunities.
+  // 1) Isolating a constant operand to be on the RHS, e.g.:
+  // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+  //
+  // 2) Folding two constants in each sub-tree as long as such folding
+  // doesn't break a legal addressing mode.
+  // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+  Register Src1Reg = MI.getOperand(1).getReg();
+  Register Src2Reg = MI.getOperand(2).getReg();
+  MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
+  MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
+
+  if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
+    // Try to match example 1).
+    if (RHS->getOpcode() != TargetOpcode::G_ADD)
+      return false;
+    auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+    if (!C2)
+      return false;
+
+    MatchInfo = [=,&MI](MachineIRBuilder &B) {
+      LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+      auto NewBase =
+          Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+      Observer.changingInstr(MI);
+      MI.getOperand(1).setReg(NewBase.getReg(0));
+      MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+      Observer.changedInstr(MI);
+    };
+  } else {
+    // Try to match example 2.
+    Register LHSSrc1 = LHS->getOperand(1).getReg();
+    Register LHSSrc2 = LHS->getOperand(2).getReg();
+    auto C1 = getConstantVRegVal(LHSSrc2, MRI);
+    if (!C1)
+      return false;
+    auto C2 = getConstantVRegVal(Src2Reg, MRI);
+    if (!C2)
+      return false;
+
+    MatchInfo = [=, &MI](MachineIRBuilder &B) {
+      auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+      Observer.changingInstr(MI);
+      MI.getOperand(1).setReg(LHSSrc1);
+      MI.getOperand(2).setReg(NewCst.getReg(0));
+      Observer.changedInstr(MI);
+    };
+  }
+  return !reassociationCanBreakAddressingModePattern(MI);
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
new file mode 100644
index 0000000000000..1c5ed0eb39d21
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
@@ -0,0 +1,153 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+---
+name:            test1_noreassoc_legal_already_new_is_illegal
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+    ; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_CONSTANT i64 4777
+    %4:_(s64) = G_CONSTANT i64 6
+    %9:_(s32) = G_CONSTANT i32 0
+    %10:_(p0) = G_PTR_ADD %0, %2(s64)
+    %11:_(p0) = G_PTR_ADD %10, %4(s64)
+    %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+    G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test2_reassoc_already_legal_new_also_legal
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+    ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_CONSTANT i64 10
+    %4:_(s64) = G_CONSTANT i64 6
+    %9:_(s32) = G_CONSTANT i32 0
+    %10:_(p0) = G_PTR_ADD %0, %2(s64)
+    %11:_(p0) = G_PTR_ADD %10, %4(s64)
+    %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+    G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test3_noreassoc_only_oneuse
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test3_noreassoc_only_oneuse
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %10:_(s64) = G_CONSTANT i64 4783
+    %9:_(p0) = G_PTR_ADD %0, %10(s64)
+    %7:_(s32) = G_LOAD %9(p0) :: (load 4)
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test4_reassoc_existing_is_already_illegal
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+    ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_CONSTANT i64 17
+    %4:_(s64) = G_CONSTANT i64 4079
+    %9:_(s32) = G_CONSTANT i32 0
+    %10:_(p0) = G_PTR_ADD %0, %2(s64)
+    %11:_(p0) = G_PTR_ADD %10, %4(s64)
+    %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+    G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test5_add_on_rhs
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test5_add_on_rhs
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 1
+    %3:_(s64) = G_ADD %1, %2
+    %4:_(p0) = G_PTR_ADD %0, %3(s64)
+    %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...


        


More information about the llvm-commits mailing list