[PATCH v2 2/6] R600: Implement zero undef variants of ctlz/cttz

Jan Vesely jan.vesely at rutgers.edu
Fri Jun 13 08:24:34 PDT 2014


v2: use ffbh/l if available

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
 lib/Target/R600/AMDGPUISelLowering.cpp   | 11 +++++++++++
 lib/Target/R600/AMDGPUSubtarget.h        |  8 ++++++++
 lib/Target/R600/EvergreenInstructions.td |  3 +++
 lib/Target/R600/SIInstructions.td        | 10 ++++++++++
 4 files changed, 32 insertions(+)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 92ab174..2e162c7 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -245,6 +245,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   if (!Subtarget->hasBCNT(64))
     setOperationAction(ISD::CTPOP, MVT::i64, Expand);
 
+  if (!Subtarget->hasFFBH())
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+
+  if (!Subtarget->hasFFBL())
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+
   MVT VTs[] = { MVT::i32, MVT::i64 };
   for (MVT VT : VTs) {
     setOperationAction(ISD::CTTZ, VT, Expand);
@@ -278,6 +287,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::CTPOP, VT, Expand);
     setOperationAction(ISD::CTTZ, VT, Expand);
     setOperationAction(ISD::CTLZ, VT, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
   }
 
   static const MVT::SimpleValueType FloatTypes[] = {
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index e1b5b33..fc6682e 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -97,6 +97,14 @@ public:
             hasCaymanISA());
   }
 
+  bool hasFFBL() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasFFBH() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
   bool IsIRStructurizerEnabled() const;
   bool isIfCvtEnabled() const;
   unsigned getWavefrontSize() const;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 28725ff..a1c509a 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -328,6 +328,9 @@ defm CUBE_eg : CUBE_Common<0xC0>;
 
 def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
 
+def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
+def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
+
 let hasSideEffects = 1 in {
   def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
 }
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index acdbc9b..c8a6e1f 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -2705,6 +2705,16 @@ def : Pat <
     (S_MOV_B32 0), sub1)
 >;
 
+def : Pat <
+  (i32 (ctlz_zero_undef i32:$val)),
+  (V_FFBH_U32_e32 $val)
+>;
+
+def : Pat <
+  (i32 (cttz_zero_undef i32:$val)),
+  (V_FFBL_B32_e32 $val)
+>;
+
 //============================================================================//
 // Miscellaneous Optimization Patterns
 //============================================================================//
-- 
1.9.3




More information about the llvm-commits mailing list