[llvm] [X86] Use GFNI for LZCNT vXi8 ops (PR #141888)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 1 02:17:16 PDT 2025
================
@@ -28998,6 +28998,35 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
}
+static SDValue LowerVectorCTLZ_GFNI(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue Input = Op.getOperand(0);
+
+ if (!VT.isVector() || VT.getVectorElementType() != MVT::i8)
+ return SDValue();
+ SmallVector<SDValue, 16> MatrixVals;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ uint8_t mask = 1 << (7 - (i % 8));
+ MatrixVals.push_back(DAG.getConstant(mask, dl, MVT::i8));
+ }
+
+ SDValue Matrix = DAG.getBuildVector(VT, dl, MatrixVals);
+ SDValue Reversed = DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, Input, Matrix,
+ DAG.getTargetConstant(0, dl, MVT::i8));
+ SDValue AddMask = DAG.getConstant(0xFF, dl, MVT::i8);
+
+ SDValue AddVec = DAG.getSplatBuildVector(VT, dl, AddMask);
+ SDValue Summed = DAG.getNode(ISD::ADD, dl, VT, Reversed, AddVec);
+ SDValue NotSummed = DAG.getNode(ISD::XOR, dl, VT, Summed, AddVec);
+ SDValue Filtered = DAG.getNode(ISD::AND, dl, VT, NotSummed, Reversed);
+ SDValue FinalMatrix = DAG.getBuildVector(VT, dl, MatrixVals);
----------------
RKSimon wrote:
This looks like you've reused the BITREVERSE matrix instead of creating a CTTZ matrix?
It should be something like:
```
MVT VT64 = DAG.getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
SDValue CTTZMatrix = DAG.getBitcast(VT, DAG.getConstant(0xAACCF0FF00000000ULL, #VT64));
```
(Might be worth adding this to getGFNICtrlImm/getGFNICtrlMask ?).
https://github.com/llvm/llvm-project/pull/141888
More information about the llvm-commits
mailing list