[llvm] r266626 - [AMDGPU] Add insert nops pass based on subtarget features instead of cl::opt

Konstantin Zhuravlyov via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 18 09:28:23 PDT 2016


Author: kzhuravl
Date: Mon Apr 18 11:28:23 2016
New Revision: 266626

URL: http://llvm.org/viewvc/llvm-project?rev=266626&view=rev
Log:
[AMDGPU] Add insert nops pass based on subtarget features instead of cl::opt
Also,
- Skip pass if machine module does not have debug info
- Minor comment changes
- Added test

Differential Revision: http://reviews.llvm.org/D19079

Added:
    llvm/trunk/test/CodeGen/AMDGPU/debugger_insert_nops.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=266626&r1=266625&r2=266626&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Mon Apr 18 11:28:23 2016
@@ -318,6 +318,17 @@ def FeatureVolcanicIslands : SubtargetFe
 >;
 
 //===----------------------------------------------------------------------===//
+// Debugger related subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureDebuggerInsertNops : SubtargetFeature<
+  "amdgpu-debugger-insert-nops",
+  "DebuggerInsertNops",
+  "true",
+  "Insert two nop instructions for each high level source statement"
+>;
+
+//===----------------------------------------------------------------------===//
 
 def AMDGPUInstrInfo : InstrInfo {
   let guessInstructionProperties = 1;

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=266626&r1=266625&r2=266626&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Apr 18 11:28:23 2016
@@ -97,7 +97,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
       HasSMemRealTime(false), Has16BitInsts(false),
       LDSBankCount(0),
       IsaVersion(ISAVersion0_0_0),
-      EnableSIScheduler(false), FrameLowering(nullptr),
+      EnableSIScheduler(false),
+      DebuggerInsertNops(false),
+      FrameLowering(nullptr),
       GISel(),
       InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=266626&r1=266625&r2=266626&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Apr 18 11:28:23 2016
@@ -95,6 +95,7 @@ private:
   int LDSBankCount;
   unsigned IsaVersion;
   bool EnableSIScheduler;
+  bool DebuggerInsertNops;
 
   std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -304,6 +305,10 @@ public:
     return EnableSIScheduler;
   }
 
+  bool debuggerInsertNops() const {
+    return DebuggerInsertNops;
+  }
+
   bool dumpCode() const {
     return DumpCode;
   }

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=266626&r1=266625&r2=266626&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Apr 18 11:28:23 2016
@@ -31,7 +31,6 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Transforms/IPO.h"
@@ -149,11 +148,6 @@ GCNTargetMachine::GCNTargetMachine(const
 
 namespace {
 
-cl::opt<bool> InsertNops(
-  "amdgpu-insert-nops",
-  cl::desc("Insert two nop instructions for each high level source statement"),
-  cl::init(false));
-
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
   AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
@@ -397,7 +391,9 @@ void GCNPassConfig::addPreSched2() {
 void GCNPassConfig::addPreEmitPass() {
   addPass(createSIInsertWaitsPass(), false);
   addPass(createSILowerControlFlowPass(), false);
-  if (InsertNops) {
+
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+  if (ST.debuggerInsertNops()) {
     addPass(createSIInsertNopsPass(), false);
   }
 }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp?rev=266626&r1=266625&r2=266626&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertNopsPass.cpp Mon Apr 18 11:28:23 2016
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// \brief Insert two S_NOP instructions for every high level source statement.
+/// \brief Insert two nop instructions for each high level source statement.
 ///
 /// Tools, such as debugger, need to pause execution based on user input (i.e.
-/// breakpoint). In order to do this, two S_NOP instructions are inserted for
-/// each high level source statement: one before first isa instruction of high
-/// level source statement, and one after last isa instruction of high level
-/// source statement. Further, debugger may replace S_NOP instructions with
-/// S_TRAP instructions based on user input.
+/// breakpoint). In order to do this, two nop instructions are inserted for each
+/// high level source statement: one before first isa instruction of high level
+/// source statement, and one after last isa instruction of high level source
+/// statement. Further, debugger may replace nop instructions with trap
+/// instructions based on user input.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "si-insert-nops"
@@ -53,10 +54,21 @@ FunctionPass *llvm::createSIInsertNopsPa
 }
 
 bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) {
+  // Skip machine functions without debug info.
+  if (!MF.getMMI().hasDebugInfo()) {
+    return false;
+  }
+
+  // Target instruction info.
   const SIInstrInfo *TII =
     static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
 
+  // Mapping from high level source statement line number to last corresponding
+  // isa instruction.
   DenseMap<unsigned, MachineBasicBlock::iterator> LineToInst;
+  // Insert nop instruction before first isa instruction of each high level
+  // source statement and collect last isa instruction for each high level
+  // source statement.
   for (auto MBB = MF.begin(); MBB != MF.end(); ++MBB) {
     for (auto MI = MBB->begin(); MI != MBB->end(); ++MI) {
       if (MI->isDebugValue() || !MI->getDebugLoc()) {
@@ -74,6 +86,8 @@ bool SIInsertNops::runOnMachineFunction(
       }
     }
   }
+  // Insert nop instruction after last isa instruction of each high level source
+  // statement.
   for (auto LineToInstEntry = LineToInst.begin();
          LineToInstEntry != LineToInst.end(); ++LineToInstEntry) {
     auto MBB = LineToInstEntry->second->getParent();
@@ -85,6 +99,7 @@ bool SIInsertNops::runOnMachineFunction(
         .addImm(0);
     }
   }
+  // Insert nop instruction before prologue.
   MachineBasicBlock &MBB = MF.front();
   MachineInstr &MI = MBB.front();
   BuildMI(MBB, MI, DebugLoc(), TII->get(AMDGPU::S_NOP))

Added: llvm/trunk/test/CodeGen/AMDGPU/debugger_insert_nops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/debugger_insert_nops.ll?rev=266626&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/debugger_insert_nops.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/debugger_insert_nops.ll Mon Apr 18 11:28:23 2016
@@ -0,0 +1,75 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK: debugger_insert_nops.cl:2:3
+; CHECK-NEXT: s_nop 0
+; CHECK: debugger_insert_nops.cl:3:3
+; CHECK-NEXT: s_nop 0
+; CHECK: debugger_insert_nops.cl:4:3
+; CHECK-NEXT: s_nop 0
+; CHECK: debugger_insert_nops.cl:5:3
+; CHECK-NEXT: s_nop 0
+; CHECK: debugger_insert_nops.cl:6:1
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: s_endpgm
+
+; Function Attrs: nounwind
+define void @debugger_insert_nops(i32 addrspace(1)* %A) #0 !dbg !12 {
+entry:
+  %A.addr = alloca i32 addrspace(1)*, align 4
+  store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+  call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
+  %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
+  store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
+  %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
+  store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
+  %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !24
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
+  store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
+  %3 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !26
+  %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 4, !dbg !26
+  store i32 4, i32 addrspace(1)* %arrayidx3, align 4, !dbg !27
+  ret void, !dbg !28
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!opencl.kernels = !{!3}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 266620)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "debugger_insert_nops.cl", directory: "/home/kzhuravl/Sandbox")
+!2 = !{}
+!3 = !{void (i32 addrspace(1)*)* @debugger_insert_nops, !4, !5, !6, !7, !8}
+!4 = !{!"kernel_arg_addr_space", i32 1}
+!5 = !{!"kernel_arg_access_qual", !"none"}
+!6 = !{!"kernel_arg_type", !"int*"}
+!7 = !{!"kernel_arg_base_type", !"int*"}
+!8 = !{!"kernel_arg_type_qual", !""}
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.9.0 (trunk 266620)"}
+!12 = distinct !DISubprogram(name: "debugger_insert_nops", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !15}
+!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
+!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
+!18 = !DIExpression()
+!19 = !DILocation(line: 1, column: 46, scope: !12)
+!20 = !DILocation(line: 2, column: 3, scope: !12)
+!21 = !DILocation(line: 2, column: 8, scope: !12)
+!22 = !DILocation(line: 3, column: 3, scope: !12)
+!23 = !DILocation(line: 3, column: 8, scope: !12)
+!24 = !DILocation(line: 4, column: 3, scope: !12)
+!25 = !DILocation(line: 4, column: 8, scope: !12)
+!26 = !DILocation(line: 5, column: 3, scope: !12)
+!27 = !DILocation(line: 5, column: 8, scope: !12)
+!28 = !DILocation(line: 6, column: 1, scope: !12)




More information about the llvm-commits mailing list