[llvm] baa4983 - [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to SVE (#173256)

Wed Dec 24 06:26:17 PST 2025

Author: Benjamin Maxwell
Date: 2025-12-24T14:26:13Z
New Revision: baa49835da4d564bd7310474751c521bdf524d5b

URL: https://github.com/llvm/llvm-project/commit/baa49835da4d564bd7310474751c521bdf524d5b
DIFF: https://github.com/llvm/llvm-project/commit/baa49835da4d564bd7310474751c521bdf524d5b.diff

LOG: [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to SVE (#173256)

This is a follow-up to #171162, which broke the (untested) lowering of
v4i16/f16 to SVE.

See: https://github.com/llvm/llvm-project/pull/171162#discussion_r2601901376

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-vector-compress.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 22c5f7dffa80d..817b00206561d 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -191,6 +191,8 @@ class VectorLegalizer {
   void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
                              bool NonArithmetic);
 
+  void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
 public:
   VectorLegalizer(SelectionDAG& dag) :
       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -705,6 +707,33 @@ void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
   Results.push_back(Res);
 }
 
+void VectorLegalizer::PromoteVECTOR_COMPRESS(
+    SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
+  MVT PromotedVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT.getSimpleVT());
+  assert((VT.isInteger() || VT.getSizeInBits() == PromotedVT.getSizeInBits()) &&
+         "Only integer promotion or bitcasts between types is supported");
+
+  SDValue Vec = Node->getOperand(0);
+  SDValue Mask = Node->getOperand(1);
+  SDValue Passthru = Node->getOperand(2);
+  if (VT.isInteger()) {
+    Vec = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Vec);
+    Mask = TLI.promoteTargetBoolean(DAG, Mask, PromotedVT);
+    Passthru = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Passthru);
+  } else {
+    Vec = DAG.getBitcast(PromotedVT, Vec);
+    Passthru = DAG.getBitcast(PromotedVT, Passthru);
+  }
+
+  SDValue Result =
+      DAG.getNode(ISD::VECTOR_COMPRESS, DL, PromotedVT, Vec, Mask, Passthru);
+  Result = VT.isInteger() ? DAG.getNode(ISD::TRUNCATE, DL, VT, Result)
+                          : DAG.getBitcast(VT, Result);
+  Results.push_back(Result);
+}
+
 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   // For a few operations there is a specific concept for promotion based on
   // the operand's type.
@@ -745,6 +774,10 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   case ISD::VECREDUCE_FMINIMUM:
     PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
     return;
+  case ISD::VECTOR_COMPRESS:
+    PromoteVECTOR_COMPRESS(Node, Results);
+    return;
+
   case ISD::FP_ROUND:
   case ISD::FP_EXTEND:
     // These operations are used to do promotion so they can't be promoted

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 30e2a7b890c6d..39d7674671876 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2003,6 +2003,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                     MVT::v2f64})
       setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
 
+    // Promote v4i16/f16 to v4i32/f32 as the SVE container for v4i16 is nxv8,
+    // which is not supported with for compact (with only +sve).
+    setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4bf16, MVT::v4i16);
+    setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4f16, MVT::v4i16);
+    setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4i16, MVT::v4i32);
+
     for (auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
                     MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
                     MVT::nxv4i32, MVT::nxv4f32}) {

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index fc46460cef758..ece7bef54ddea 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -205,6 +205,54 @@ define <4 x i32> @test_compress_v4i32_with_sve(<4 x i32> %vec, <4 x i1> %mask) {
     ret <4 x i32> %out
 }
 
+define <4 x i16> @test_compress_v4i16_with_sve(<4 x i16> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4i16_with_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4h, v1.4h, #15
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+    %out = call <4 x i16> @llvm.experimental.vector.compress(<4 x i16> %vec, <4 x i1> %mask, <4 x i16> poison)
+    ret <4 x i16> %out
+}
+
+define <4 x half> @test_compress_v4f16_with_sve(<4 x half> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4f16_with_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4h, v1.4h, #15
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+    %out = call <4 x half> @llvm.experimental.vector.compress(<4 x half> %vec, <4 x i1> %mask, <4 x half> poison)
+    ret <4 x half> %out
+}
+
+define <4 x bfloat> @test_compress_v4bf16_with_sve(<4 x bfloat> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4bf16_with_sve:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4h, v1.4h, #15
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+    %out = call <4 x bfloat> @llvm.experimental.vector.compress(<4 x bfloat> %vec, <4 x i1> %mask, <4 x bfloat> poison)
+    ret <4 x bfloat> %out
+}
+
 define <1 x i32> @test_compress_v1i32_with_sve(<1 x i32> %vec, <1 x i1> %mask) {
 ; CHECK-LABEL: test_compress_v1i32_with_sve:
 ; CHECK:       // %bb.0: