[llvm] [X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') (PR #126448)

João Gouveia via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 08:19:07 PST 2025


================
@@ -53600,6 +53600,40 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
                                  DAG.getUNDEF(SrcVT)));
 }
 
+// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
+// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
+// to avoid generating code with MOVABS and large constants in certain cases.
+static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
+                                     const SDLoc &DL) {
+  using namespace llvm::SDPatternMatch;
+
+  SDValue AddLhs;
+  APInt AddConst, SrlConst;
+  if (VT != MVT::i32 ||
+      !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
+                           m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
+                                                m_ConstInt(AddConst))),
+                                 m_ConstInt(SrlConst)))))
+    return SDValue();
+
+  if (!SrlConst.ugt(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
+    return SDValue();
+
+  SDValue AddLHSSrl =
+      DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+
+  APInt NewAddConstVal =
+      (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
+  SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
+  SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+
+  APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
+  SDValue CleanupSizeConst = DAG.getConstant(CleanupSizeConstVal, DL, VT);
+  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewAddNode, CleanupSizeConst);
----------------
joaotgouveia wrote:

I'm inclined to think that the MOVZ is unnecessary. This transformation with the AND does not use it https://godbolt.org/z/a5d5vhdsb, and my previous implementation of the fold (although less general) also did not generate any MOVZs. I'll change the implementation to use anyext.

https://github.com/llvm/llvm-project/pull/126448


More information about the llvm-commits mailing list