[llvm] [LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 (PR #75158)

Thu Dec 14 07:53:07 PST 2023

================
@@ -3053,17 +3053,21 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
 
   bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
                    Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+  bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;
 
-  if (Src.getValueType() == MVT::i32) {
+  if (Src.getValueType() == MVT::i32 || Is64BitScalar) {
     // (ctlz hi:lo) -> (umin (ffbh src), 32)
     // (cttz hi:lo) -> (umin (ffbl src), 32)
     // (ctlz_zero_undef src) -> (ffbh src)
     // (cttz_zero_undef src) -> (ffbl src)
+    //  64-bit scalar version produce 32-bit result
     SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
     if (!ZeroUndef) {
       const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
       NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
     }
+    if (Is64BitScalar)
+      return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
     return NewOpr;
----------------
jayfoad wrote:

I think ZERO_EXTEND to the same type is optimized out as a no-op, so you could write this more simply:
```suggestion
    return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr);
```

https://github.com/llvm/llvm-project/pull/75158