[llvm] 974cf71 - [AArch64][GlobalISel] Add a simple cross-regclass copy optimization post-selection.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 1 16:09:28 PDT 2022


Author: Amara Emerson
Date: 2022-11-01T16:09:21-07:00
New Revision: 974cf71649150c3da9b7590a2ade2a49bc197f4c

URL: https://github.com/llvm/llvm-project/commit/974cf71649150c3da9b7590a2ade2a49bc197f4c
DIFF: https://github.com/llvm/llvm-project/commit/974cf71649150c3da9b7590a2ade2a49bc197f4c.diff

LOG: [AArch64][GlobalISel] Add a simple cross-regclass copy optimization post-selection.

This does some trivial cross-regclass folding, where we can either do some extra
constraining to eliminate the copy or modify uses to use a smaller regclass.

There are minor code size improvements on average.

Program                                       size.__text
                                              before         after           diff
tramp3d-v4/tramp3d-v4                         366000.00      366012.00       0.0%
mafft/pairlocalalign                          248196.00      248188.00      -0.0%
7zip/7zip-benchmark                           568612.00      568592.00      -0.0%
kimwitu++/kc                                  434704.00      434676.00      -0.0%
Bullet/bullet                                 456128.00      456096.00      -0.0%
sqlite3/sqlite3                               284136.00      284100.00      -0.0%
ClamAV/clamscan                               381492.00      381396.00      -0.0%
SPASS/SPASS                                   412052.00      411944.00      -0.0%
lencod/lencod                                 428060.00      427912.00      -0.0%
consumer-typeset/consumer-typeset             413148.00      411116.00      -0.5%
                           Geomean difference                               -0.1%

Differential Revision: https://reviews.llvm.org/D136793

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index ce6f15a799b72..670a16209705c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -14,12 +14,15 @@
 #include "AArch64.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #define DEBUG_TYPE "aarch64-post-select-optimize"
 
@@ -42,6 +45,9 @@ class AArch64PostSelectOptimize : public MachineFunctionPass {
 
 private:
   bool optimizeNZCVDefs(MachineBasicBlock &MBB);
+  bool doPeepholeOpts(MachineBasicBlock &MBB);
+  /// Look for cross regclass copies that can be trivially eliminated.
+  bool foldSimpleCrossClassCopies(MachineInstr &MI);
 };
 } // end anonymous namespace
 
@@ -74,6 +80,62 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
   }
 }
 
+bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
+    Changed |= foldSimpleCrossClassCopies(MI);
+  }
+  return Changed;
+}
+
+bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
+  auto *MF = MI.getMF();
+  auto &MRI = MF->getRegInfo();
+
+  if (!MI.isCopy())
+    return false;
+
+  if (MI.getOperand(1).getSubReg())
+    return false; // Don't deal with subreg copies
+
+  Register Src = MI.getOperand(1).getReg();
+  Register Dst = MI.getOperand(0).getReg();
+
+  if (Src.isPhysical() || Dst.isPhysical())
+    return false;
+
+  const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+
+  if (SrcRC == DstRC)
+    return false;
+
+
+  if (SrcRC->hasSubClass(DstRC)) {
+    // This is the case where the source class is a superclass of the dest, so
+    // if the copy is the only user of the source, we can just constrain the
+    // source reg to the dest class.
+
+    if (!MRI.hasOneNonDBGUse(Src))
+      return false; // Only constrain single uses of the source.
+
+    // Constrain to dst reg class as long as it's not a weird class that only
+    // has a few registers.
+    if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
+      return false;
+  } else if (DstRC->hasSubClass(SrcRC)) {
+    // This is the inverse case, where the destination class is a superclass of
+    // the source. Here, if the copy is the only user, we can just constrain
+    // the user of the copy to use the smaller class of the source.
+  } else {
+    return false;
+  }
+
+  MRI.replaceRegWith(Dst, Src);
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
   // Consider the following code:
   //  FCMPSrr %0, %1, implicit-def $nzcv
@@ -178,8 +240,10 @@ bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
          "Expected a selected MF");
 
   bool Changed = false;
-  for (auto &BB : MF)
+  for (auto &BB : MF) {
     Changed |= optimizeNZCVDefs(BB);
+    Changed |= doPeepholeOpts(BB);
+  }
   return Changed;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir
new file mode 100644
index 0000000000000..ea9c3881c6c3b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir
@@ -0,0 +1,116 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+  @x = external hidden local_unnamed_addr global i32*, align 8
+  define void @copy_from_larger_rc_def() { ret void }
+  define void @copy_from_larger_rc_def_multi_use() { ret void }
+  define void @copy_from_smaller_rc_def() { ret void }
+
+...
+---
+name:            copy_from_larger_rc_def
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+  - { reg: '$x2' }
+body:             |
+  bb.1:
+    liveins: $w1, $x0, $x2
+
+    ; Show that if we're doing a copy from a large rc to a single user with a smaller rc
+    ; then we just constrain the def instead.
+    ; CHECK-LABEL: name: copy_from_larger_rc_def
+    ; CHECK: liveins: $w1, $x0, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: %large_rc_def:gpr64common = UBFMXri [[COPY]], 61, 60
+    ; CHECK-NEXT: %add:gpr64sp = ADDXri %large_rc_def, 3, 0
+    ; CHECK-NEXT: $x0 = COPY %add
+    ; CHECK-NEXT: RET_ReallyLR
+    %0:gpr64 = COPY $x0
+    %large_rc_def:gpr64 = UBFMXri %0, 61, 60
+    %constrain_copy:gpr64common = COPY %large_rc_def
+    ; Even though ADDXri may not actually need to use gpr64common, just use it as an example.
+    %add:gpr64sp = ADDXri %constrain_copy, 3, 0
+    $x0 = COPY %add
+    RET_ReallyLR
+
+...
+---
+name:            copy_from_larger_rc_def_multi_use
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+  - { reg: '$x2' }
+body:             |
+  bb.1:
+    liveins: $w1, $x0, $x2
+
+    ; Don't constrain def if the original def has multiple users.
+    ; CHECK-LABEL: name: copy_from_larger_rc_def_multi_use
+    ; CHECK: liveins: $w1, $x0, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: %large_rc_def:gpr64 = UBFMXri [[COPY]], 61, 60
+    ; CHECK-NEXT: %constrain_copy:gpr64common = COPY %large_rc_def
+    ; CHECK-NEXT: %add:gpr64sp = ADDXri %constrain_copy, 3, 0
+    ; CHECK-NEXT: %add2:gpr64sp = ADDXri %constrain_copy, 3, 0
+    ; CHECK-NEXT: $x0 = COPY %add
+    ; CHECK-NEXT: $x1 = COPY %large_rc_def
+    ; CHECK-NEXT: RET_ReallyLR
+    %0:gpr64 = COPY $x0
+    %large_rc_def:gpr64 = UBFMXri %0, 61, 60
+    %constrain_copy:gpr64common = COPY %large_rc_def
+    %add:gpr64sp = ADDXri %constrain_copy, 3, 0
+    %add2:gpr64sp = ADDXri %constrain_copy, 3, 0
+    $x0 = COPY %add
+    $x1 = COPY %large_rc_def
+    RET_ReallyLR
+
+...
+---
+name:            copy_from_smaller_rc_def
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+  - { reg: '$x2' }
+body:             |
+  bb.1:
+    liveins: $w1, $x0, $x2
+
+    ; Show that if we're doing a copy from a small rc to a single user with a larger rc
+    ; then we just use the smaller def instead of doing a copy.
+    ; CHECK-LABEL: name: copy_from_smaller_rc_def
+    ; CHECK: liveins: $w1, $x0, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+    ; CHECK-NEXT: %add:gpr64common = ADDXri [[COPY1]], 3, 0
+    ; CHECK-NEXT: STRXui [[COPY1]], %add, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0))
+    ; CHECK-NEXT: RET_ReallyLR
+    %0:gpr64common = COPY $x0
+    %1:gpr64common = COPY $x1
+
+    %add:gpr64common = ADDXri %1, 3, 0
+    %copy:gpr64sp = COPY %add
+    STRXui %1, %copy, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0))
+    RET_ReallyLR
+
+...


        


More information about the llvm-commits mailing list