[llvm] [X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') (PR #126448)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 06:12:24 PST 2025
=?utf-8?q?João?= Gouveia <jtalonegouveia at gmail.com>,
=?utf-8?q?João?= Gouveia <jtalonegouveia at gmail.com>,
=?utf-8?q?João?= Gouveia <jtalonegouveia at gmail.com>,
=?utf-8?q?João?= Gouveia <jtalonegouveia at gmail.com>,
=?utf-8?q?João?= Gouveia <jtalonegouveia at gmail.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/126448 at github.com>
================
@@ -53600,6 +53600,40 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
DAG.getUNDEF(SrcVT)));
}
+// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
+// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
+// to avoid generating code with MOVABS and large constants in certain cases.
+static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
+ const SDLoc &DL) {
+ using namespace llvm::SDPatternMatch;
+
+ SDValue AddLhs;
+ APInt AddConst, SrlConst;
+ if (VT != MVT::i32 ||
+ !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
+ m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
+ m_ConstInt(AddConst))),
+ m_ConstInt(SrlConst)))))
+ return SDValue();
+
+ if (!SrlConst.ugt(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
+ return SDValue();
+
+ SDValue AddLHSSrl =
+ DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+
+ APInt NewAddConstVal =
+ (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
+ SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
+ SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+
+ APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
+ SDValue CleanupSizeConst = DAG.getConstant(CleanupSizeConstVal, DL, VT);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewAddNode, CleanupSizeConst);
----------------
phoebewang wrote:
I think they are the two sides to the same coin. If you believe MOVZ is unnecessary, then it means we can assume the high 16-bit are all zeros. But I don't know how to prove it. I used zext just because there's no anyext in the IR.
https://github.com/llvm/llvm-project/pull/126448
More information about the llvm-commits
mailing list