[llvm] 4af885c - [AArch64] Fix performZExtUZPCombine() DAG combine (#183765)

Mon Mar 2 05:28:28 PST 2026

Author: Gaëtan Bossu
Date: 2026-03-02T13:28:23Z
New Revision: 4af885c0c13c0cf1fe9aee554634020d801b4999

URL: https://github.com/llvm/llvm-project/commit/4af885c0c13c0cf1fe9aee554634020d801b4999
DIFF: https://github.com/llvm/llvm-project/commit/4af885c0c13c0cf1fe9aee554634020d801b4999.diff

LOG: [AArch64] Fix performZExtUZPCombine() DAG combine (#183765)

This used to look through all extract_subvector nodes, including those
extracting a fixed vector from a scalable one. One consequence is that
we could end up generating a NVCAST from a scalable vector to a fixed
one, even when vscale was unknown.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/zext-shuffle.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c9e216e766e9f..3f0915e9d0fcb 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24297,10 +24297,16 @@ static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N,
   if (ExtOffset != 0 && ExtOffset != VT.getVectorNumElements())
     return SDValue();
 
-  EVT InVT = N->getOperand(0).getOperand(0).getValueType();
   auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0).getOperand(0));
-  if (!Shuffle ||
-      InVT.getVectorNumElements() != VT.getVectorNumElements() * 2 ||
+  if (!Shuffle)
+    return SDValue();
+
+  // From here it is safe to assume InVT is a fixed-length vector. The only
+  // legal scalable vector shuffle is splat, and it should have been lowered to
+  // vector_splat.
+  EVT InVT = N->getOperand(0).getOperand(0).getValueType();
+  assert(InVT.isFixedLengthVector() && "Unexpected scalable shufflevector.");
+  if (InVT.getVectorNumElements() != VT.getVectorNumElements() * 2 ||
       InVT.getScalarSizeInBits() * 2 != VT.getScalarSizeInBits())
     return SDValue();
 
@@ -24356,6 +24362,9 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) {
   if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
     ExtOffset = Op.getConstantOperandVal(1);
     Op = Op.getOperand(0);
+    // Avoid NVCAST from a scalable vector to a fixed-size one.
+    if (Op.getValueType().isScalableVector())
+      return SDValue();
   }
 
   unsigned Shift = 0;
@@ -24382,6 +24391,9 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) {
     if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
       ExtOffset = Op.getConstantOperandVal(1);
       Op = Op.getOperand(0);
+      // Avoid NVCAST from a scalable vector to a fixed-size one.
+      if (Op.getValueType().isScalableVector())
+        return SDValue();
     } else
       return SDValue();
   }

diff  --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll
index a0d4e18acb6c8..f3db88af0787c 100644
--- a/llvm/test/CodeGen/AArch64/zext-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll
@@ -684,3 +684,60 @@ define i16 @undeftop(<8 x i16> %0) {
   %4 = extractelement <8 x i16> %last, i32 0
   ret i16 %4
 }
+
+; Negative test for performZExtUZPCombine().
+; Do not combine away extracts from scalable vectors.
+define <4 x i32> @prevent_scalable_nvcast(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: prevent_scalable_nvcast:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
+entry:
+  %v.even = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %v,
+                                                                   <vscale x 8 x i16> %v)
+  %0 = call <4 x i16> @llvm.vector.extract.v4i16.nxv8i16(<vscale x 8 x i16> %v.even, i64 0)
+  %1 = zext <4 x i16> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+; Negative test for performZExtUZPCombine().
+; Similar as above. Scalable vectors are introduced due to the custom
+; legalisation of the wide shufflevector into a scalable AArch64ISD::UZP2
+; (because vscale is known to be 2).
+define <4 x i32> @prevent_wide_nvcast(<vscale x 8 x i16> %v) #1 {
+; CHECK-LABEL: prevent_wide_nvcast:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp2 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
+entry:
+  %v.fixed = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16(<vscale x 8 x i16> %v, i64 0)
+  %v.odd = shufflevector <16 x i16> %v.fixed, <16 x i16> %v.fixed, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15,
+                                                                               i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %0 = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> %v.odd, i64 0)
+  %1 = zext <4 x i16> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+; Negative test for performZExtUZPCombine().
+; The latter can also handle and/lsr in between the vector_extract and zext, so
+; make sure vector_extract from scalable vectors are also rejected there.
+define <4 x i32> @prevent_scalable_nvcast_mask(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: prevent_scalable_nvcast_mask:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
+entry:
+  %v.even = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %v,
+                                                                   <vscale x 8 x i16> %v)
+  %0 = call <4 x i16> @llvm.vector.extract.v4i16.nxv8i16(<vscale x 8 x i16> %v.even, i64 0)
+  %masked = and <4 x i16> %0, splat (i16 255)
+  %1 = zext <4 x i16> %masked to <4 x i32>
+  ret <4 x i32> %1
+}
+
+attributes #0 = { vscale_range(1,16) "target-features"="+sve" }
+attributes #1 = { vscale_range(2,2) "target-features"="+sve" }