[llvm] [SelectionDAG] Use ExpandIntRes_CLMUL to expand vector CLMUL via narrower legal types (PR #184468)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 13:54:24 PST 2026
================
@@ -8456,22 +8456,153 @@ SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
}
+/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
+/// a chain of halving decompositions (halving element width) and/or vector
+/// widening (doubling element count). This guides expansion strategy selection:
+/// if true, the halving/widening path produces better code than bit-by-bit.
+///
+/// HalveDepth tracks halving steps only (each creates ~4x more operations).
+/// Widening steps are cheap (O(1) pad/extract) and don't count.
+/// Limiting halvings to 2 prevents exponential blowup:
+/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
+/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
+/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
+static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx,
+ EVT VT, unsigned HalveDepth = 0,
+ unsigned TotalDepth = 0) {
+ if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
+ return false;
+ if (TLI.isOperationLegalOrCustom(ISD::CLMUL, VT))
+ return true;
+ if (!TLI.isTypeLegal(VT))
+ return false;
+
+ unsigned BW = VT.getScalarSizeInBits();
+
+ // Halve: halve element width, same element count.
+ // This is the expensive step -- each halving creates ~4x more operations.
+ if (BW >= 16) {
+ EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
+ EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
+ if (TLI.isTypeLegal(HalfVT) &&
+ canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
+ return true;
+ }
+
+ // Widen: double element count (fixed-width vectors only).
+ // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
+ EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
+ if (TLI.isTypeLegal(WideVT) &&
+ canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
+ return true;
+
+ return false;
+}
+
SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
SDLoc DL(Node);
EVT VT = Node->getValueType(0);
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
unsigned BW = VT.getScalarSizeInBits();
unsigned Opcode = Node->getOpcode();
+ LLVMContext &Ctx = *DAG.getContext();
switch (Opcode) {
case ISD::CLMUL: {
+ // For vector types, try decomposition strategies that leverage legal
+ // CLMUL on narrower or wider element types, avoiding the expensive
+ // bit-by-bit expansion.
+ if (VT.isVector()) {
+ // Strategy 1: Halving decomposition to half-element-width CLMUL.
+ // Applies ExpandIntRes_CLMUL's identity element-wise:
+ // CLMUL(X, Y) = (Hi << HalfBW) | Lo
+ // where:
+ // Lo = CLMUL(XLo, YLo)
+ // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
+ unsigned HalfBW = BW / 2;
+ if (HalfBW >= 8) {
+ EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
+ EVT HalfVT =
+ EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
+ if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
+ /*HalveDepth=*/1)) {
+ SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
+
+ // Extract low and high halves of each element.
+ SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
+ SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
+ DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
+ SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
+ SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
+ DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
+
+ // Lo = CLMUL(XLo, YLo)
+ SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
+
+ // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
+ SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
+ SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
+ SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
+ SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
+ SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
+
+ // Reassemble: Result = ZExt(Lo) | (ZExt(Hi) << HalfBW)
+ SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
+ SDValue HiExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi);
----------------
topperc wrote:
Can HiExt be ANY_EXTEND?
https://github.com/llvm/llvm-project/pull/184468
More information about the llvm-commits
mailing list