[PATCH v2 2/2] R600/SI: Optimize SI_KILL for constant operands

Michel Dänzer michel at daenzer.net
Tue Feb 25 02:36:47 PST 2014


From: Michel Dänzer <michel.daenzer at amd.com>

If the SI_KILL operand is constant, we can either clear the exec mask if
the operand is negative, or do nothing otherwise.

Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
---

v2:
* Use just 'SI' as the lit test prefix.

 lib/Target/R600/SIInstructions.td      |  4 ++--
 lib/Target/R600/SILowerControlFlow.cpp | 25 ++++++++++++++++++-------
 test/CodeGen/R600/llvm.AMDGPU.kill.ll  | 10 +++++++---
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index b45da5c..b501645 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1333,7 +1333,7 @@ def SI_END_CF : InstSI <
 
 def SI_KILL : InstSI <
   (outs),
-  (ins VReg_32:$src),
+  (ins SSrc_32:$src),
   "SI_KILL $src",
   [(int_AMDGPU_kill f32:$src)]
 >;
@@ -1425,7 +1425,7 @@ def : Pat<
 
 def : Pat <
   (int_AMDGPU_kilp),
-  (SI_KILL (V_MOV_B32_e32 0xbf800000))
+  (SI_KILL 0xbf800000)
 >;
 
 /* int_SI_vs_load_input */
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index fa5ee16..5eda3e9 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -55,6 +55,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
 
 using namespace llvm;
 
@@ -82,7 +83,7 @@ private:
   void Loop(MachineInstr &MI);
   void EndCf(MachineInstr &MI);
 
-  void Kill(MachineInstr &MI);
+  void Kill(MachineInstr &MI, unsigned Depth);
   void Branch(MachineInstr &MI);
 
   void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
@@ -291,9 +292,10 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
   // If these aren't equal, this is probably an infinite loop.
 }
 
-void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+void SILowerControlFlowPass::Kill(MachineInstr &MI, unsigned Depth) {
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MI.getDebugLoc();
+  const MachineOperand &Op = MI.getOperand(0);
 
   // Kill is only allowed in pixel / geometry shaders
   assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
@@ -301,10 +303,19 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
          MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
          ShaderType::GEOMETRY);
 
-  // Clear this pixel from the exec mask if the operand is negative
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
-          .addImm(0)
-          .addOperand(MI.getOperand(0));
+  // Clear this thread from the exec mask if the operand is negative
+  if ((Op.isImm() || Op.isFPImm())) {
+    // Constant operand: Set exec mask to 0 or do nothing
+    if (Op.isImm() ? (Op.getImm() & 0x80000000) :
+        Op.getFPImm()->isNegative()) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+              .addImm(0);
+    }
+  } else {
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+           .addImm(0)
+           .addOperand(MI.getOperand(0));
+  }
 
   MI.eraseFromParent();
 }
@@ -478,7 +489,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
             SkipIfDead(MI);
           else
             HaveKill = true;
-          Kill(MI);
+          Kill(MI, Depth);
           break;
 
         case AMDGPU::S_BRANCH:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
index bec5cdf..4ab6a8a 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.kill.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
@@ -1,13 +1,17 @@
 ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
 
-; SI-LABEL: @kill_gs
-; SI: V_CMPX_LE_F32
+; SI-LABEL: @kill_gs_const
+; SI-NOT: V_CMPX_LE_F32
+; SI: S_MOV_B64 exec, 0
 
-define void @kill_gs() #0 {
+define void @kill_gs_const() #0 {
 main_body:
   %0 = icmp ule i32 0, 3
   %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
   call void @llvm.AMDGPU.kill(float %1)
+  %2 = icmp ule i32 3, 0
+  %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
+  call void @llvm.AMDGPU.kill(float %3)
   ret void
 }
 
-- 
1.9.0




More information about the llvm-commits mailing list