[Openmp-commits] [libcxxabi] [llvm] [compiler-rt] [openmp] [mlir] [libcxx] [clang-tools-extra] [flang] [clang] [libc] [AArch64] Add custom lowering for load <3 x i8>. (PR #78632)

Tim Northover via Openmp-commits openmp-commits at lists.llvm.org
Mon Jan 29 07:09:10 PST 2024


================
@@ -11012,6 +11012,50 @@ SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG) {
       MaskSourceVec);
 }
 
+// Check if Op is a BUILD_VECTOR with 2 extracts and a load that is cheaper to
+// insert into a vector and use a shuffle. This improves lowering for loads of
+// <3 x i8>.
+static SDValue shuffleWithSingleLoad(SDValue Op, SelectionDAG &DAG) {
+  if (Op.getNumOperands() != 4 || Op.getValueType() != MVT::v4i16)
+    return SDValue();
+
+  SDValue V0 = Op.getOperand(0);
+  SDValue V1 = Op.getOperand(1);
+  SDValue V2 = Op.getOperand(2);
+  SDValue V3 = Op.getOperand(3);
+  if (V0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      V1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      V2.getOpcode() != ISD::LOAD ||
+      !(V3.isUndef() || V3.getOpcode() == ISD::EXTRACT_VECTOR_ELT))
+    return SDValue();
+
+  if (V0.getOperand(0) != V1.getOperand(0) ||
+      V0.getConstantOperandVal(1) != 0 || V1.getConstantOperandVal(1) != 1 ||
+      !(V3.isUndef() || V3.getConstantOperandVal(1) == 3))
+    return SDValue();
+
+  SDLoc dl(Op);
+  auto *L = cast<LoadSDNode>(Op.getOperand(2));
+  auto Vec = V0.getOperand(0);
+
+  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Vec.getValueType(), Vec,
+                    SDValue(L, 0), DAG.getConstant(2, dl, MVT::i64));
+  Vec = DAG.getNode(ISD::BITCAST, dl, MVT::v4i16, Vec);
----------------
TNorthover wrote:

I think `Vec` could have quite a variety of unexpected types here (though running at a specific phase of DAG might limit that). There's no reason to expect it to have either 4 elements or for each element to be `i16` just from what you've checked so far.

https://github.com/llvm/llvm-project/pull/78632


More information about the Openmp-commits mailing list