[llvm] r292956 - [AMDGPU] Add VGPR copies post regalloc fix pass

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 24 09:46:17 PST 2017


Author: rampitec
Date: Tue Jan 24 11:46:17 2017
New Revision: 292956

URL: http://llvm.org/viewvc/llvm-project?rev=292956&view=rev
Log:
[AMDGPU] Add VGPR copies post regalloc fix pass

Regalloc creates COPY instructions which do not formally use VALU.
That results in v_mov instructions displaced after exec mask modification.
One pass which do it is SIOptimizeExecMasking, but potentially it can be
done by other passes too.

This patch adds a pass immediately after regalloc to add implicit exec
use operand to all VGPR copy instructions.

Differential Revision: https://reviews.llvm.org/D28874

Added:
    llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
    llvm/trunk/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=292956&r1=292955&r2=292956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Tue Jan 24 11:46:17 2017
@@ -60,6 +60,9 @@ extern char &SIShrinkInstructionsID;
 void initializeSIFixSGPRCopiesPass(PassRegistry &);
 extern char &SIFixSGPRCopiesID;
 
+void initializeSIFixVGPRCopiesPass(PassRegistry &);
+extern char &SIFixVGPRCopiesID;
+
 void initializeSILowerI1CopiesPass(PassRegistry &);
 extern char &SILowerI1CopiesID;
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=292956&r1=292955&r2=292956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Tue Jan 24 11:46:17 2017
@@ -86,6 +86,7 @@ extern "C" void LLVMInitializeAMDGPUTarg
   PassRegistry *PR = PassRegistry::getPassRegistry();
   initializeSILowerI1CopiesPass(*PR);
   initializeSIFixSGPRCopiesPass(*PR);
+  initializeSIFixVGPRCopiesPass(*PR);
   initializeSIFoldOperandsPass(*PR);
   initializeSIShrinkInstructionsPass(*PR);
   initializeSIFixControlFlowLiveIntervalsPass(*PR);
@@ -615,6 +616,7 @@ void GCNPassConfig::addOptimizedRegAlloc
 }
 
 void GCNPassConfig::addPostRegAlloc() {
+  addPass(&SIFixVGPRCopiesID);
   addPass(&SIOptimizeExecMaskingID);
   TargetPassConfig::addPostRegAlloc();
 }

Modified: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt?rev=292956&r1=292955&r2=292956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt Tue Jan 24 11:46:17 2017
@@ -68,6 +68,7 @@ add_llvm_target(AMDGPUCodeGen
   SIDebuggerInsertNops.cpp
   SIFixControlFlowLiveIntervals.cpp
   SIFixSGPRCopies.cpp
+  SIFixVGPRCopies.cpp
   SIFoldOperands.cpp
   SIFrameLowering.cpp
   SIInsertSkips.cpp

Added: llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp?rev=292956&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp (added)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp Tue Jan 24 11:46:17 2017
@@ -0,0 +1,72 @@
+//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Add implicit use of exec to vector register copies.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-vgpr-copies"
+
+namespace {
+
+class SIFixVGPRCopies : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIFixVGPRCopies() : MachineFunctionPass(ID) {
+    initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return "SI Fix VGPR copies"; }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false)
+
+char SIFixVGPRCopies::ID = 0;
+
+char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID;
+
+bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  bool Changed = false;
+
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      switch (MI.getOpcode()) {
+      case AMDGPU::COPY:
+        if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) {
+          MI.addOperand(MF,
+                        MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+          DEBUG(dbgs() << "Add exec use to " << MI);
+          Changed = true;
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  return Changed;
+}

Added: llvm/trunk/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fix-vgpr-copies.mir?rev=292956&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fix-vgpr-copies.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fix-vgpr-copies.mir Tue Jan 24 11:46:17 2017
@@ -0,0 +1,44 @@
+# RUN: llc -march=amdgcn -start-after=greedy -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s
+# Check that we first do all vector instructions and only then change exec
+# CHECK-DAG:  COPY %vgpr10_vgpr11
+# CHECK-DAG:  COPY %vgpr12_vgpr13
+# CHECK:      %exec = COPY
+
+---
+name:            main
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%sgpr4_sgpr5' }
+  - { reg: '%sgpr6' }
+  - { reg: '%vgpr0' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %vgpr3, %vgpr10_vgpr11, %vgpr12_vgpr13
+
+    %vcc = V_CMP_NE_U32_e64 0, killed %vgpr3, implicit %exec
+    %sgpr4_sgpr5 = COPY %exec, implicit-def %exec
+    %sgpr6_sgpr7 = S_AND_B64 %sgpr4_sgpr5, killed %vcc, implicit-def dead %scc
+    %sgpr4_sgpr5 = S_XOR_B64 %sgpr6_sgpr7, killed %sgpr4_sgpr5, implicit-def dead %scc
+    %vgpr61_vgpr62 = COPY %vgpr10_vgpr11
+    %vgpr155_vgpr156 = COPY %vgpr12_vgpr13
+    %exec = S_MOV_B64_term killed %sgpr6_sgpr7
+...




More information about the llvm-commits mailing list