[llvm] [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to SVE (PR #173256)

Mon Dec 22 04:58:29 PST 2025

https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/173256

This is a follow-up to #171162, which broke the (untested) lowering of v4i16/f16 to SVE.

See https://github.com/llvm/llvm-project/pull/171162#discussion_r2601901376

>From df2a65094b580ab1b7adf7c2bc744172f56c1471 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 22 Dec 2025 12:49:33 +0000
Subject: [PATCH] [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to
 SVE

This is a follow-up to #171162, which broke the (untested) lowering
of v4i16/f16 to SVE.

See https://github.com/llvm/llvm-project/pull/171162#discussion_r2601901376
---
 .../SelectionDAG/LegalizeVectorOps.cpp        | 33 +++++++++++++++++++
 .../Target/AArch64/AArch64ISelLowering.cpp    |  5 +++
 .../CodeGen/AArch64/sve-vector-compress.ll    | 32 ++++++++++++++++++
 3 files changed, 70 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 22c5f7dffa80d..0cb45ea93346e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -191,6 +191,8 @@ class VectorLegalizer {
   void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
                              bool NonArithmetic);
 
+  void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
 public:
   VectorLegalizer(SelectionDAG& dag) :
       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -705,6 +707,33 @@ void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
   Results.push_back(Res);
 }
 
+void VectorLegalizer::PromoteVECTOR_COMPRESS(
+    SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
+  EVT IntVT = VT.changeVectorElementTypeToInteger();
+  MVT PromotedVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT.getSimpleVT());
+  assert(PromotedVT.isInteger() && "Only promotion to integers supported");
+
+  SDValue Vec = Node->getOperand(0);
+  SDValue Mask = TLI.promoteTargetBoolean(DAG, Node->getOperand(1), PromotedVT);
+  SDValue Passthru = Node->getOperand(2);
+  if (VT.isFloatingPoint()) {
+    Vec = DAG.getBitcast(IntVT, Vec);
+    Passthru = DAG.getBitcast(IntVT, Passthru);
+  }
+  Vec = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Vec);
+  Passthru = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Passthru);
+
+  SDValue Result =
+      DAG.getNode(ISD::VECTOR_COMPRESS, DL, PromotedVT, Vec, Mask, Passthru);
+  Result = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Result);
+
+  if (VT.isFloatingPoint())
+    Result = DAG.getBitcast(VT, Result);
+  Results.push_back(Result);
+}
+
 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   // For a few operations there is a specific concept for promotion based on
   // the operand's type.
@@ -745,6 +774,10 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   case ISD::VECREDUCE_FMINIMUM:
     PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
     return;
+  case ISD::VECTOR_COMPRESS:
+    PromoteVECTOR_COMPRESS(Node, Results);
+    return;
+
   case ISD::FP_ROUND:
   case ISD::FP_EXTEND:
     // These operations are used to do promotion so they can't be promoted
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8c56071c5527d..41234566dcf01 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2003,6 +2003,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                     MVT::v2f64})
       setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
 
+    // Promote v4i16/f16 to v4i32/f32 as the SVE container for v4i16 is nxv8,
+    // which is not supported with for compact (with only +sve).
+    setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4i16, MVT::v4i32);
+    setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4f16, MVT::v4i32);
+
     for (auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
                     MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
                     MVT::nxv4i32, MVT::nxv4f32}) {
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index fc46460cef758..034db474d9a41 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -205,6 +205,38 @@ define <4 x i32> @test_compress_v4i32_with_sve(<4 x i32> %vec, <4 x i1> %mask) {
     ret <4 x i32> %out
 }
 
+define <4 x i16> @test_compress_v4i16_with_sve(<4 x i16> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4i16_with_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4h, v1.4h, #15
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+    %out = call <4 x i16> @llvm.experimental.vector.compress(<4 x i16> %vec, <4 x i1> %mask, <4 x i16> poison)
+    ret <4 x i16> %out
+}
+
+define <4 x half> @test_compress_v4f16_with_sve(<4 x half> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4f16_with_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4h, v1.4h, #15
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+    %out = call <4 x half> @llvm.experimental.vector.compress(<4 x half> %vec, <4 x i1> %mask, <4 x half> poison)
+    ret <4 x half> %out
+}
+
 define <1 x i32> @test_compress_v1i32_with_sve(<1 x i32> %vec, <1 x i1> %mask) {
 ; CHECK-LABEL: test_compress_v1i32_with_sve:
 ; CHECK:       // %bb.0: