[llvm] [X86] Attempt to use VPMADD52L/VPMULUDQ instead of VPMULLQ on slow VPMULLQ targets (or when VPMULLQ is unavailable) (PR #171760)

Sat Dec 13 07:01:03 PST 2025

================
@@ -49926,6 +49873,37 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineMulToPMULDQ(N, DL, DAG, Subtarget))
     return V;
 
+  if (VT.getScalarType() == MVT::i64 && Subtarget.isPMULLQSlow()) {
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+
+    KnownBits Known0 = DAG.computeKnownBits(Op0);
+    KnownBits Known1 = DAG.computeKnownBits(Op1);
+    unsigned Count0 = Known0.countMinLeadingZeros();
+    unsigned Count1 = Known1.countMinLeadingZeros();
+
+    // Optimization 1: Use VPMULUDQ (32-bit multiply).
+    if (Count0 >= 32 && Count1 >= 32) {
+      return DAG.getNode(X86ISD::PMULUDQ, DL, VT, Op0, Op1);
+    }
+
+    // Optimization 1.5: Use PMULDQ (32-bit signed multiply).
+    unsigned Sign0 = DAG.ComputeNumSignBits(Op0);
+    unsigned Sign1 = DAG.ComputeNumSignBits(Op1);
+    if (Sign0 > 32 && Sign1 > 32) {
+      return DAG.getNode(X86ISD::PMULDQ, DL, VT, Op0, Op1);
+    }
+
+    // Optimization 2: Use VPMADD52L (52-bit multiply-add).
+    if (Subtarget.hasAVX512() && Subtarget.hasIFMA() &&
----------------
RKSimon wrote:

What about hasAVXIFMA?

https://github.com/llvm/llvm-project/pull/171760