[llvm] Perform bitreverse using AVX512 GFNI for i32 and i64. (PR #81764)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 15 01:28:54 PST 2024


================
@@ -31040,17 +31044,63 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
   return DAG.getBitcast(VT, Res);
 }
 
+static auto createBSWAPShuffleMask(EVT VT) {
+  SmallVector<int, 16> ShuffleMask;
+  int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
+  for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
+    for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
+      ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
+
+  return ShuffleMask;
+}
+
 static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
                                SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
+  SDValue In = Op.getOperand(0);
+  SDLoc DL(Op);
+
+  auto HasGFNI = Subtarget.hasGFNI();
+  auto ScalarType = VT.getScalarType();
+
+  if (HasGFNI && ((ScalarType == MVT::i32) || (ScalarType == MVT::i64))) {
+    if (VT.isVector()) {
+      SmallVector<int, 16> BSWAPMask = createBSWAPShuffleMask(VT);
+      EVT ByteVT =
+          EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
+      SDValue VecShuffle = DAG.getVectorShuffle(
+          ByteVT, DL, DAG.getNode(ISD::BITCAST, DL, ByteVT, In),
+          DAG.getUNDEF(ByteVT), BSWAPMask);
+      SDValue BitReverse = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, VecShuffle);
+      return DAG.getBitcast(VT, BitReverse);
+    } else {
+      auto CastTo = ScalarType == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
+      SDValue ScalarToVector =
+          DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, CastTo, In);
+      SDValue BitReverse =
+          DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
+                      DAG.getBitcast(MVT::v16i8, ScalarToVector));
+      SDValue ExtractElementZero = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, DL, ScalarType,
+          DAG.getBitcast(CastTo, BitReverse), DAG.getIntPtrConstant(0, DL));
+      return DAG.getNode(ISD::BSWAP, DL, ScalarType, ExtractElementZero);
+    }
+  }
 
   if (Subtarget.hasXOP() && !VT.is512BitVector())
     return LowerBITREVERSE_XOP(Op, DAG);
 
   assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
 
-  SDValue In = Op.getOperand(0);
-  SDLoc DL(Op);
+  assert(VT.getScalarType() == MVT::i8 &&
+         "Only byte vector BITREVERSE supported");
+
+  // Split v64i8 without BWI so that we can still use the PSHUFB lowering.
+
+  if (Subtarget.hasXOP() && !VT.is512BitVector())
+    return LowerBITREVERSE_XOP(Op, DAG);
+
+  assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
----------------
phoebewang wrote:

Doesn't it duplicate line 31090?

https://github.com/llvm/llvm-project/pull/81764


More information about the llvm-commits mailing list