[llvm] [RISCV] Pack build_vectors into largest available element type (PR #97351)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 14:58:11 PDT 2024
================
@@ -3896,6 +3896,66 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+/// Double the element size of the build vector to reduce the number
+/// of vslide1down in the build vector chain. In the worst case, this
+/// trades three scalar operations for 1 vector operation. Scalar
+/// operations are generally lower latency, and for out-of-order cores
+/// we also benefit from additional parallelism.
+static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
+ MVT ElemVT = VT.getVectorElementType();
+ if (!ElemVT.isInteger())
+ return SDValue();
+
+ // TODO: Relax these architectural restrictions, possibly with costing
+ // of the actual instructions required.
+ if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned ElemSizeInBits = ElemVT.getSizeInBits();
+ if (ElemSizeInBits >= Subtarget.getELen() || NumElts % 2 != 0)
+ return SDValue();
+
+ // Produce [B,A] packed into a type twice as wide. Note that all
+ // scalars are XLenVT, possibly masked (see below).
+ MVT XLenVT = Subtarget.getXLenVT();
+ auto pack = [&](SDValue A, SDValue B) {
+ // Bias the scheduling of the inserted operations to near the
+ // definition of the element - this tends to reduce register
+ // pressure overall.
+ SDLoc ElemDL(B);
+ SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
+ return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
+ DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt));
+ };
+
+ SDValue Mask = DAG.getConstant(
+ APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
+ SmallVector<SDValue> NewOperands;
+ NewOperands.reserve(NumElts / 2);
+ for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2) {
+ SDValue A = Op.getOperand(i);
+ SDValue B = Op.getOperand(i + 1);
+ if (ElemVT != XLenVT) {
----------------
preames wrote:
I think you're right, this condition is just dead. Will remove.
https://github.com/llvm/llvm-project/pull/97351
More information about the llvm-commits
mailing list