[llvm] r191790 - R600: add a pass that merges clauses.

Vincent Lejeune vljn at ovi.com
Tue Oct 1 12:32:59 PDT 2013


Author: vljn
Date: Tue Oct  1 14:32:58 2013
New Revision: 191790

URL: http://llvm.org/viewvc/llvm-project?rev=191790&view=rev
Log:
R600: add a pass that merges clauses.

Added:
    llvm/trunk/lib/Target/R600/R600ClauseMergePass.cpp
Modified:
    llvm/trunk/lib/Target/R600/AMDGPU.h
    llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
    llvm/trunk/lib/Target/R600/CMakeLists.txt
    llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
    llvm/trunk/lib/Target/R600/R600InstrInfo.h
    llvm/trunk/lib/Target/R600/R600Instructions.td
    llvm/trunk/test/CodeGen/R600/jump-address.ll
    llvm/trunk/test/CodeGen/R600/loop-address.ll
    llvm/trunk/test/CodeGen/R600/selectcc-opt.ll

Modified: llvm/trunk/lib/Target/R600/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.h?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPU.h Tue Oct  1 14:32:58 2013
@@ -29,6 +29,7 @@ FunctionPass *createR600VectorRegMerger(
 FunctionPass *createR600TextureIntrinsicsReplacer();
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
 FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
 FunctionPass *createR600Packetizer(TargetMachine &tm);
 FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
 FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);

Modified: llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp Tue Oct  1 14:32:58 2013
@@ -168,10 +168,11 @@ bool AMDGPUPassConfig::addPostRegAlloc()
 bool AMDGPUPassConfig::addPreSched2() {
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
 
-  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
     addPass(createR600EmitClauseMarkers(*TM));
-  }
   addPass(&IfConverterID);
+  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+    addPass(createR600ClauseMergePass(*TM));
   return false;
 }
 

Modified: llvm/trunk/lib/Target/R600/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/CMakeLists.txt?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/R600/CMakeLists.txt Tue Oct  1 14:32:58 2013
@@ -28,6 +28,7 @@ add_llvm_target(R600CodeGen
   AMDGPUConvertToISA.cpp
   AMDGPUInstrInfo.cpp
   AMDGPURegisterInfo.cpp
+  R600ClauseMergePass.cpp
   R600ControlFlowFinalizer.cpp
   R600EmitClauseMarkers.cpp
   R600ExpandSpecialInstrs.cpp

Added: llvm/trunk/lib/Target/R600/R600ClauseMergePass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ClauseMergePass.cpp?rev=191790&view=auto
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ClauseMergePass.cpp (added)
+++ llvm/trunk/lib/Target/R600/R600ClauseMergePass.cpp Tue Oct  1 14:32:58 2013
@@ -0,0 +1,204 @@
+//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
+/// This pass is merging consecutive CFAlus where applicable.
+/// It needs to be called after IfCvt for best results.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "r600mergeclause"
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static bool isCFAlu(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case AMDGPU::CF_ALU:
+  case AMDGPU::CF_ALU_PUSH_BEFORE:
+    return true;
+  default:
+    return false;
+  }
+}
+
+class R600ClauseMergePass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  unsigned getCFAluSize(const MachineInstr *MI) const;
+  bool isCFAluEnabled(const MachineInstr *MI) const;
+
+  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
+  /// removed and their content affected to the previous alu clause.
+  /// This function parse instructions after CFAlu untill it find a disabled
+  /// CFAlu and merge the content, or an enabled CFAlu.
+  void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
+
+  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
+  /// it is the case.
+  bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
+      const;
+
+public:
+  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const;
+};
+
+char R600ClauseMergePass::ID = 0;
+
+unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
+  assert(isCFAlu(MI));
+  return MI->getOperand(
+      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
+}
+
+bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
+  assert(isCFAlu(MI));
+  return MI->getOperand(
+      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
+}
+
+void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
+    const {
+  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+  MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
+  I++;
+  do {
+    while (I!= E && !isCFAlu(I))
+      I++;
+    if (I == E)
+      return;
+    MachineInstr *MI = I++;
+    if (isCFAluEnabled(MI))
+      break;
+    CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
+    MI->eraseFromParent();
+  } while (I != E);
+}
+
+bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
+                                          const MachineInstr *LatrCFAlu) const {
+  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
+  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+  unsigned RootInstCount = getCFAluSize(RootCFAlu),
+      LaterInstCount = getCFAluSize(LatrCFAlu);
+  unsigned CumuledInsts = RootInstCount + LaterInstCount;
+  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
+    DEBUG(dbgs() << "Excess inst counts\n");
+    return false;
+  }
+  if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+    return false;
+  // Is KCache Bank 0 compatible ?
+  int Mode0Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
+  int KBank0Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
+  int KBank0LineIdx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
+  if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
+      RootCFAlu->getOperand(Mode0Idx).getImm() &&
+      (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
+       RootCFAlu->getOperand(KBank0Idx).getImm() ||
+      LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
+      RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
+    DEBUG(dbgs() << "Wrong KC0\n");
+    return false;
+  }
+  // Is KCache Bank 1 compatible ?
+  int Mode1Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
+  int KBank1Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
+  int KBank1LineIdx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
+  if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
+      RootCFAlu->getOperand(Mode1Idx).getImm() &&
+      (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
+      RootCFAlu->getOperand(KBank1Idx).getImm() ||
+      LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
+      RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
+    DEBUG(dbgs() << "Wrong KC0\n");
+    return false;
+  }
+  if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
+    RootCFAlu->getOperand(Mode0Idx).setImm(
+        LatrCFAlu->getOperand(Mode0Idx).getImm());
+    RootCFAlu->getOperand(KBank0Idx).setImm(
+        LatrCFAlu->getOperand(KBank0Idx).getImm());
+    RootCFAlu->getOperand(KBank0LineIdx).setImm(
+        LatrCFAlu->getOperand(KBank0LineIdx).getImm());
+  }
+  if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
+    RootCFAlu->getOperand(Mode1Idx).setImm(
+        LatrCFAlu->getOperand(Mode1Idx).getImm());
+    RootCFAlu->getOperand(KBank1Idx).setImm(
+        LatrCFAlu->getOperand(KBank1Idx).getImm());
+    RootCFAlu->getOperand(KBank1LineIdx).setImm(
+        LatrCFAlu->getOperand(KBank1LineIdx).getImm());
+  }
+  RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
+  RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
+  return true;
+}
+
+bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
+    MachineBasicBlock::iterator LatestCFAlu = E;
+    while (I != E) {
+      MachineInstr *MI = I++;
+      if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
+          TII->mustBeLastInClause(MI->getOpcode()))
+        LatestCFAlu = E;
+      if (!isCFAlu(MI))
+        continue;
+      cleanPotentialDisabledCFAlu(MI);
+
+      if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
+        MI->eraseFromParent();
+      } else {
+        assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
+        LatestCFAlu = MI;
+      }
+    }
+  }
+  return false;
+}
+
+const char *R600ClauseMergePass::getPassName() const {
+  return "R600 Merge Clause Markers Pass";
+}
+
+} // end anonymous namespace
+
+
+llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
+  return new R600ClauseMergePass(TM);
+}

Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.cpp Tue Oct  1 14:32:58 2013
@@ -153,6 +153,24 @@ bool R600InstrInfo::isLDSInstr(unsigned
           (TargetFlags & R600_InstFlag::LDS_1A2D));
 }
 
+bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
+  if (isALUInstr(MI->getOpcode()))
+    return true;
+  if (isVector(*MI) || isCubeOp(MI->getOpcode()))
+    return true;
+  switch (MI->getOpcode()) {
+  case AMDGPU::PRED_X:
+  case AMDGPU::INTERP_PAIR_XY:
+  case AMDGPU::INTERP_PAIR_ZW:
+  case AMDGPU::INTERP_VEC_LOAD:
+  case AMDGPU::COPY:
+  case AMDGPU::DOT_4:
+    return true;
+  default:
+    return false;
+  }
+}
+
 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
   if (ST.hasCaymanISA())
     return false;

Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.h (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.h Tue Oct  1 14:32:58 2013
@@ -66,6 +66,10 @@ namespace llvm {
   bool hasInstrModifiers(unsigned Opcode) const;
   bool isLDSInstr(unsigned Opcode) const;
 
+  /// \returns true if this \p Opcode represents an ALU instruction or an
+  /// instruction that will be lowered in ExpandSpecialInstrs Pass.
+  bool canBeConsideredALU(const MachineInstr *MI) const;
+
   bool isTransOnly(unsigned Opcode) const;
   bool isTransOnly(const MachineInstr *MI) const;
   bool isVectorOnly(unsigned Opcode) const;

Modified: llvm/trunk/lib/Target/R600/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/R600/R600Instructions.td Tue Oct  1 14:32:58 2013
@@ -590,6 +590,7 @@ i32imm:$COUNT, i32imm:$Enabled),
   let ALT_CONST = 0;
   let WHOLE_QUAD_MODE = 0;
   let BARRIER = 1;
+  let UseNamedOperandTable = 1;
 
   let Inst{31-0} = Word0;
   let Inst{63-32} = Word1;

Modified: llvm/trunk/test/CodeGen/R600/jump-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/jump-address.ll?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/jump-address.ll (original)
+++ llvm/trunk/test/CodeGen/R600/jump-address.ll Tue Oct  1 14:32:58 2013
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: JUMP @10
+; CHECK: JUMP @5
 ; CHECK: EXPORT
 ; CHECK-NOT: EXPORT
 

Modified: llvm/trunk/test/CodeGen/R600/loop-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/loop-address.ll?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/loop-address.ll (original)
+++ llvm/trunk/test/CodeGen/R600/loop-address.ll Tue Oct  1 14:32:58 2013
@@ -1,9 +1,9 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK: ALU_PUSH
-;CHECK: LOOP_START_DX10 @13
-;CHECK: LOOP_BREAK @12
-;CHECK: POP @12
+;CHECK: LOOP_START_DX10 @11
+;CHECK: LOOP_BREAK @10
+;CHECK: POP @10
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
 target triple = "r600--"

Modified: llvm/trunk/test/CodeGen/R600/selectcc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/selectcc-opt.ll?rev=191790&r1=191789&r2=191790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/selectcc-opt.ll (original)
+++ llvm/trunk/test/CodeGen/R600/selectcc-opt.ll Tue Oct  1 14:32:58 2013
@@ -30,7 +30,8 @@ ENDIF:
 
 ; CHECK: @test_b
 ; CHECK: SET{{[GTEQN]+}}_DX10
-; CHECK: PRED_
+; CHECK-NEXT: PRED_
+; CHECK-NEXT: ALU clause starting
 define void @test_b(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp olt float %in, 0.0





More information about the llvm-commits mailing list