[Openmp-commits] [libcxxabi] [llvm] [compiler-rt] [openmp] [mlir] [libcxx] [clang-tools-extra] [flang] [clang] [libc] [AArch64] Add custom lowering for load <3 x i8>. (PR #78632)
Tim Northover via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jan 29 07:09:10 PST 2024
================
@@ -11012,6 +11012,50 @@ SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG) {
MaskSourceVec);
}
+// Check if Op is a BUILD_VECTOR with 2 extracts and a load that is cheaper to
+// insert into a vector and use a shuffle. This improves lowering for loads of
+// <3 x i8>.
+static SDValue shuffleWithSingleLoad(SDValue Op, SelectionDAG &DAG) {
+ if (Op.getNumOperands() != 4 || Op.getValueType() != MVT::v4i16)
+ return SDValue();
+
+ SDValue V0 = Op.getOperand(0);
+ SDValue V1 = Op.getOperand(1);
+ SDValue V2 = Op.getOperand(2);
+ SDValue V3 = Op.getOperand(3);
+ if (V0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ V1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ V2.getOpcode() != ISD::LOAD ||
+ !(V3.isUndef() || V3.getOpcode() == ISD::EXTRACT_VECTOR_ELT))
+ return SDValue();
+
+ if (V0.getOperand(0) != V1.getOperand(0) ||
+ V0.getConstantOperandVal(1) != 0 || V1.getConstantOperandVal(1) != 1 ||
+ !(V3.isUndef() || V3.getConstantOperandVal(1) == 3))
+ return SDValue();
+
+ SDLoc dl(Op);
+ auto *L = cast<LoadSDNode>(Op.getOperand(2));
+ auto Vec = V0.getOperand(0);
+
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Vec.getValueType(), Vec,
+ SDValue(L, 0), DAG.getConstant(2, dl, MVT::i64));
+ Vec = DAG.getNode(ISD::BITCAST, dl, MVT::v4i16, Vec);
----------------
TNorthover wrote:
I think `Vec` could have quite a variety of unexpected types here (though running at a specific phase of DAG might limit that). There's no reason to expect it to have either 4 elements or for each element to be `i16` just from what you've checked so far.
https://github.com/llvm/llvm-project/pull/78632
More information about the Openmp-commits
mailing list