[llvm] 65f99b5 - [AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs just before selection.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 14:10:45 PST 2020


Author: Amara Emerson
Date: 2020-02-26T14:10:32-08:00
New Revision: 65f99b5383ff3293881f59dd64cfb596c3d03aa4

URL: https://github.com/llvm/llvm-project/commit/65f99b5383ff3293881f59dd64cfb596c3d03aa4
DIFF: https://github.com/llvm/llvm-project/commit/65f99b5383ff3293881f59dd64cfb596c3d03aa4.diff

LOG: [AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs just before selection.

Since all types <32b on gpr end up being assigned gpr32 regclasses, we can end
up with PHIs here which try to select between a gpr32 and an fpr16. Ideally RBS
shouldn't be selecting heterogenous regbanks for operands if possible, but we
still need to be able to deal with it here.

To fix this, if we have a gpr-bank operand < 32b in size and at least one other
operand is on the fpr bank, then we add cross-bank copies to homogenize the
operand banks. For simplicity the bank that we choose to settle on is whatever
bank the def operand has. For example:

%endbb:
  %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
 =>
%bb2:
  ...
  %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
  ...
%endbb:
  %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2

Differential Revision: https://reviews.llvm.org/D75086

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 25aba784b197..2ec12effb8f6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -64,6 +64,8 @@ class AArch64InstructionSelector : public InstructionSelector {
     ProduceNonFlagSettingCondBr =
         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
     MFReturnAddr = Register();
+
+    processPHIs(MF);
   }
 
 private:
@@ -78,6 +80,9 @@ class AArch64InstructionSelector : public InstructionSelector {
   // An early selection function that runs before the selectImpl() call.
   bool earlySelect(MachineInstr &I) const;
 
+  // Do some preprocessing of G_PHIs before we begin selection.
+  void processPHIs(MachineFunction &MF);
+
   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
 
   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
@@ -5327,6 +5332,95 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
   }
 }
 
+
+// Perform fixups on the given PHI instruction's operands to force them all
+// to be the same as the destination regbank.
+static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
+                            const AArch64RegisterBankInfo &RBI) {
+  assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
+  Register DstReg = MI.getOperand(0).getReg();
+  const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
+  assert(DstRB && "Expected PHI dst to have regbank assigned");
+  MachineIRBuilder MIB(MI);
+
+  // Go through each operand and ensure it has the same regbank.
+  for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
+    MachineOperand &MO = MI.getOperand(OpIdx);
+    if (!MO.isReg())
+      continue;
+    Register OpReg = MO.getReg();
+    const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
+    if (RB != DstRB) {
+      // Insert a cross-bank copy.
+      auto *OpDef = MRI.getVRegDef(OpReg);
+      const LLT &Ty = MRI.getType(OpReg);
+      MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
+      auto Copy = MIB.buildCopy(Ty, OpReg);
+      MRI.setRegBank(Copy.getReg(0), *DstRB);
+      MO.setReg(Copy.getReg(0));
+    }
+  }
+}
+
+void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
+  // We're looking for PHIs, build a list so we don't invalidate iterators.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SmallVector<MachineInstr *, 32> Phis;
+  for (auto &BB : MF) {
+    for (auto &MI : BB) {
+      if (MI.getOpcode() == TargetOpcode::G_PHI)
+        Phis.emplace_back(&MI);
+    }
+  }
+
+  for (auto *MI : Phis) {
+    // We need to do some work here if the operand types are < 16 bit and they
+    // are split across fpr/gpr banks. Since all types <32b on gpr
+    // end up being assigned gpr32 regclasses, we can end up with PHIs here
+    // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
+    // be selecting heterogenous regbanks for operands if possible, but we
+    // still need to be able to deal with it here.
+    //
+    // To fix this, if we have a gpr-bank operand < 32b in size and at least
+    // one other operand is on the fpr bank, then we add cross-bank copies
+    // to homogenize the operand banks. For simplicity the bank that we choose
+    // to settle on is whatever bank the def operand has. For example:
+    //
+    // %endbb:
+    //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
+    //  =>
+    // %bb2:
+    //   ...
+    //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
+    //   ...
+    // %endbb:
+    //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
+    bool HasGPROp = false, HasFPROp = false;
+    for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
+      const auto &MO = MI->getOperand(OpIdx);
+      if (!MO.isReg())
+        continue;
+      const LLT &Ty = MRI.getType(MO.getReg());
+      if (!Ty.isValid() || !Ty.isScalar())
+        break;
+      if (Ty.getSizeInBits() >= 32)
+        break;
+      const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
+      // If for some reason we don't have a regbank yet. Don't try anything.
+      if (!RB)
+        break;
+
+      if (RB->getID() == AArch64::GPRRegBankID)
+        HasGPROp = true;
+      else
+        HasFPROp = true;
+    }
+    // We have heterogenous regbanks, need to fixup.
+    if (HasGPROp && HasFPROp)
+      fixupPHIOpBanks(*MI, MRI, RBI);
+  }
+}
+
 namespace llvm {
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &TM,

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
new file mode 100644
index 000000000000..1b7c07401899
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
@@ -0,0 +1,110 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
+---
+name:            test_loop_phi_fpr_to_gpr
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+liveins:         []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: test_loop_phi_fpr_to_gpr
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK:   [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
+  ; CHECK:   [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
+  ; CHECK:   [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK:   $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
+  ; CHECK:   [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
+  ; CHECK:   [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
+  ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+  ; CHECK:   STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
+  ; CHECK:   B %bb.2
+  bb.0:
+    successors: %bb.1(0x80000000)
+
+    %0:gpr(s1) = G_IMPLICIT_DEF
+    %4:gpr(p0) = G_IMPLICIT_DEF
+    %8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+    %6:gpr(s32) = G_IMPLICIT_DEF
+    %7:gpr(s32) = G_SELECT %0(s1), %6, %6
+    %1:gpr(s16) = G_TRUNC %7(s32)
+
+  bb.2:
+    successors: %bb.2(0x80000000)
+
+    %3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2
+    %5:fpr(s16) = G_FPTRUNC %8(s32)
+    G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
+    G_BR %bb.2
+
+...
+---
+name:            test_loop_phi_gpr_to_fpr
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+liveins:         []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: test_loop_phi_gpr_to_fpr
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK:   [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
+  ; CHECK:   [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
+  ; CHECK:   [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK:   $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
+  ; CHECK:   [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
+  ; CHECK:   [[COPY1:%[0-9]+]]:fpr32 = COPY [[CSELWr]]
+  ; CHECK:   [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1
+  ; CHECK:   [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
+  ; CHECK:   STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
+  ; CHECK:   B %bb.2
+  bb.0:
+    successors: %bb.1(0x80000000)
+
+    %0:gpr(s1) = G_IMPLICIT_DEF
+    %4:gpr(p0) = G_IMPLICIT_DEF
+    %8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+    %6:gpr(s32) = G_IMPLICIT_DEF
+    %7:gpr(s32) = G_SELECT %0(s1), %6, %6
+    %1:gpr(s16) = G_TRUNC %7(s32)
+
+  bb.2:
+    successors: %bb.2(0x80000000)
+
+    %3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1
+    %5:fpr(s16) = G_FPTRUNC %8(s32)
+    G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
+    G_BR %bb.2
+
+...


        


More information about the llvm-commits mailing list