[llvm] [AArch64] Improve lowering of truncating build vectors (PR #81960)

Wed Feb 21 06:50:48 PST 2024

================
@@ -11369,54 +11369,105 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
   return true;
 }
 
-// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
-// v4i32s. This is really a truncate, which we can construct out of (legal)
-// concats and truncate nodes.
-static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) {
-  if (V.getValueType() != MVT::v16i8)
-    return SDValue();
-  assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");
-
-  for (unsigned X = 0; X < 4; X++) {
-    // Check the first item in each group is an extract from lane 0 of a v4i32
-    // or v4i16.
-    SDValue BaseExt = V.getOperand(X * 4);
-    if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-        (BaseExt.getOperand(0).getValueType() != MVT::v4i16 &&
-         BaseExt.getOperand(0).getValueType() != MVT::v4i32) ||
-        !isa<ConstantSDNode>(BaseExt.getOperand(1)) ||
-        BaseExt.getConstantOperandVal(1) != 0)
+// Detect patterns like a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3, that
+// are truncates, which we can construct out of (legal) concats and truncate
+// nodes.
+static SDValue ReconstructTruncateFromBuildVector(SDValue V,
+                                                  SelectionDAG &DAG) {
+  EVT BVTy = V.getValueType();
+  if (BVTy != MVT::v16i8 && BVTy != MVT::v8i16 && BVTy != MVT::v8i8 &&
+      BVTy != MVT::v4i16)
+    return SDValue();
+
+  // Only handle truncating BVs.
+  if (V.getOperand(0).getValueType().getSizeInBits() ==
----------------
david-arm wrote:

nit: Can you call `getFixedSizeInBits()` here, since you know it should be a scalar? This is better than calling `getSizeInBits()`, which makes implicit use of the cast operator to convert a TypeSize object -> uint64_t type. It's also faster, since the operator is not inlined.

https://github.com/llvm/llvm-project/pull/81960