[llvm] [x86] Add lowering for `@llvm.experimental.vector.compress` (PR #104904)

Fri Aug 30 03:36:54 PDT 2024

================
@@ -17778,6 +17784,71 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
   llvm_unreachable("Unimplemented!");
 }
 
+// As legal vpcompress instructions depend on various AVX512 extensions, try to
+// convert illegal vector sizes to legal ones to avoid expansion.
+static SDValue lowerVECTOR_COMPRESS(SDValue Op, const X86Subtarget &Subtarget,
+                                    SelectionDAG &DAG) {
+  assert(Subtarget.hasAVX512() &&
+         "Need AVX512 for custom VECTOR_COMPRESS lowering.");
+
+  SDLoc DL(Op);
+  SDValue Vec = Op.getOperand(0);
+  SDValue Mask = Op.getOperand(1);
+  SDValue Passthru = Op.getOperand(2);
+
+  EVT VecVT = Vec.getValueType();
+  EVT ElementVT = VecVT.getVectorElementType();
+  unsigned NumElements = VecVT.getVectorNumElements();
+  unsigned NumVecBits = VecVT.getFixedSizeInBits();
+  unsigned NumElementBits = ElementVT.getFixedSizeInBits();
+
+  // 128- and 256-bit vectors with <= 16 elements can be converted to and
+  // compressed as 512-bit vectors in AVX512F.
+  if (NumVecBits != 128 && NumVecBits != 256)
+    return SDValue();
+
+  if (NumElementBits == 32 || NumElementBits == 64) {
+    unsigned NumLargeElements = 512 / NumElementBits;
+    EVT LargeVecVT =
+        MVT::getVectorVT(ElementVT.getSimpleVT(), NumLargeElements);
+    EVT LargeMaskVT = MVT::getVectorVT(MVT::i1, NumLargeElements);
+
+    SDValue InsertPos = DAG.getConstant(0, DL, MVT::i64);
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, LargeVecVT,
+                      DAG.getUNDEF(LargeVecVT), Vec, InsertPos);
+    Mask = DAG.getNode(
+        ISD::INSERT_SUBVECTOR, DL, LargeMaskVT,
+        DAG.getSplatVector(LargeMaskVT, DL, DAG.getConstant(0, DL, MVT::i1)),
+        Mask, InsertPos);
----------------
RKSimon wrote:

You can use the widenSubVector helper functions to insert into the zero/undef wider subvectors

https://github.com/llvm/llvm-project/pull/104904