[llvm] [LLVM][CodeGen][SVE] Add lowering for ISD::[ANY,SIGN,ZERO]_EXTEND_VECTOR_INREG. (PR #169847)

Thu Nov 27 10:04:54 PST 2025

https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/169847

The rational behind the change is to reduce the number of target specific nodes that exist prior to operation legalisation. This is the change to ReplaceExtractSubVectorResults, with everything else fixing the fallout from that change.

>From d3e39ed1813c9ed3b6524c3427f76554992c28e5 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 27 Nov 2025 15:44:26 +0000
Subject: [PATCH] [LLVM][CodeGen][SVE] Add lowering for
 ISD::[ANY,SIGN,ZERO]_EXTEND_VECTOR_INREG.

The rational behind the change is to reduce the number of target
specific nodes that exist prior to operation legalisation. This is the
change to ReplaceExtractSubVectorResults, with everything else fixing
the fallout from that change.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  48 ++++++-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   1 +
 llvm/test/CodeGen/AArch64/sve-sext-zext.ll    | 128 ++++++++++++++++++
 3 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dd70d729ffc91..e8aee2f7eb8f5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1591,6 +1591,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::AVGCEILS, VT, Custom);
       setOperationAction(ISD::AVGCEILU, VT, Custom);
 
+      setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Custom);
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+
       if (!Subtarget->isLittleEndian())
         setOperationAction(ISD::BITCAST, VT, Custom);
 
@@ -7851,6 +7855,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::BUILD_VECTOR:
     return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+    return LowerEXTEND_VECTOR_INREG(Op, DAG);
   case ISD::ZERO_EXTEND_VECTOR_INREG:
     return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
   case ISD::VECTOR_SHUFFLE:
@@ -14688,6 +14695,40 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
                       Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
 }
 
+SDValue
+AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  assert(VT.isScalableVector() && "Unexpected result type!");
+
+  bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
+  unsigned UnpackOpcode = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+
+  // Repeatedly unpack Val until the result is of the desired type.
+  SDValue Val = Op.getOperand(0);
+  switch (Val.getSimpleValueType().SimpleTy) {
+  default:
+    return SDValue();
+  case MVT::nxv16i8:
+    Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv8i16, Val);
+    if (VT == MVT::nxv8i16)
+      break;
+    [[fallthrough]];
+  case MVT::nxv8i16:
+    Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv4i32, Val);
+    if (VT == MVT::nxv4i32)
+      break;
+    [[fallthrough]];
+  case MVT::nxv4i32:
+    Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv2i64, Val);
+    assert(VT == MVT::nxv2i64 && "Unexpected result type!");
+    break;
+  }
+
+  return Val;
+}
+
 // Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
 // but we don't have an appropriate instruction,
 // so custom-lower it as ZIP1-with-zeros.
@@ -14696,6 +14737,10 @@ AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
                                                      SelectionDAG &DAG) const {
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
+
+  if (VT.isScalableVector())
+    return LowerEXTEND_VECTOR_INREG(Op, DAG);
+
   SDValue SrcOp = Op.getOperand(0);
   EVT SrcVT = SrcOp.getValueType();
   assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
@@ -28872,7 +28917,8 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
   if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
     return;
 
-  unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
+  unsigned Opcode = (Index == 0) ? (unsigned)ISD::ANY_EXTEND_VECTOR_INREG
+                                 : (unsigned)AArch64ISD::UUNPKHI;
   EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
 
   SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ca08eb40c956a..b49e5e29ebe74 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -714,6 +714,7 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
index 88e13ea1e0fa4..845628a91498b 100644
--- a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
@@ -456,3 +456,131 @@ define <vscale x 2 x i64> @zext_i18_i64(<vscale x 2 x i18> %a) {
   %r = zext <vscale x 2 x i18> %a to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %r
 }
+
+define <vscale x 8 x i16> @sext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_inreg_i16_from_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+  %sext = sext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %sext
+}
+
+define <vscale x 4 x i32> @sext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_inreg_i32_from_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+  %sext = sext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %sext
+}
+
+define <vscale x 4 x i32> @sext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sext_inreg_i32_from_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+  %sext = sext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %sext
+}
+
+define <vscale x 2 x i64> @sext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_inreg_i64_from_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+  %sext = sext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %sext
+}
+
+define <vscale x 2 x i64> @sext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sext_inreg_i64_from_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+  %sext = sext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %sext
+}
+
+define <vscale x 2 x i64> @sext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sext_inreg_i64_from_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
+  %sext = sext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %sext
+}
+
+define <vscale x 8 x i16> @zext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_inreg_i16_from_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+  %zext = zext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %zext
+}
+
+define <vscale x 4 x i32> @zext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_inreg_i32_from_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+  %zext = zext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %zext
+}
+
+define <vscale x 4 x i32> @zext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: zext_inreg_i32_from_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+  %zext = zext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %zext
+}
+
+define <vscale x 2 x i64> @zext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_inreg_i64_from_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+  %zext = zext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %zext
+}
+
+define <vscale x 2 x i64> @zext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: zext_inreg_i64_from_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+  %zext = zext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %zext
+}
+
+define <vscale x 2 x i64> @zext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: zext_inreg_i64_from_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
+  %zext = zext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %zext
+}