[llvm] [X86] Use an FP-based expansion for v4i32 ctlz on SSE2-only targets (PR #167034)

Mon Nov 10 03:43:03 PST 2025

================
@@ -9480,6 +9481,55 @@ SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
 }
 
+
+SDValue TargetLowering::expandCTLZWithFP(SDNode *Node, SelectionDAG &DAG) const {
+  // pseudocode :
+  // if(x==0) return 32;
+  // float f = (float) x;
+  // int i = bitcast<int>(f);
+  // int ilog2 = (i >> 23) - 127;
+  // return 31 - ilog2;
+
+  SDLoc dl(Node);
+  SDValue Op = Node->getOperand(0);
+  EVT VT = Op.getValueType();
+
+  assert(VT.isVector() && "This expansion is intended for vectors");
+
+  EVT EltVT = VT.getVectorElementType();
+  EVT FloatVT, CmpVT;
+  unsigned BitWidth, MantissaBits, ExponentBias;
+
+  // Converting to float type
+  if (EltVT == MVT::i32) {
+    FloatVT = VT.changeVectorElementType(MVT::f32);
+    const fltSemantics &Sem = FloatVT.getVectorElementType().getFltSemantics();
+    BitWidth = EltVT.getSizeInBits();
+    MantissaBits = APFloat::semanticsPrecision(Sem) - 1;
+    ExponentBias =
+        static_cast<unsigned>(-APFloat::semanticsMinExponent(Sem) + 1);
+  } 
+  else {
+    return SDValue();
+  }
+
+  // Handling the case for when Op == 0 which is stored in ZeroRes
+  CmpVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
+  SDValue IsZero = DAG.getSetCC(dl, CmpVT, Op, Zero, ISD::SETEQ);
+  SDValue ZeroRes = DAG.getConstant(BitWidth, dl, VT);
----------------
jayfoad wrote:

You can omit the `Op == 0` handling for `CTLZ_ZERO_UNDEF`.

https://github.com/llvm/llvm-project/pull/167034