[llvm] [SDAG] Fix incorrect usage of VECREDUCE_ADD (PR #171459)

Tue Dec 9 07:45:10 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-selectiondag

Author: Benjamin Maxwell (MacDue)

<details>
<summary>Changes</summary>

The mask needs to be extended to `i32` before reducing or the reduction can incorrectly optimized to a VECREDUCE_XOR.

---
Full diff: https://github.com/llvm/llvm-project/pull/171459.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+2-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-vector-compress.ll (+13-12) 


``````````diff

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index da3102d30e153..1a82cdc2206e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2664,7 +2664,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo,
 
   // We store LoVec and then insert HiVec starting at offset=|1s| in LoMask.
   SDValue WideMask =
-      DAG.getNode(ISD::ZERO_EXTEND, DL, LoMask.getValueType(), LoMask);
+      DAG.getNode(ISD::ZERO_EXTEND, DL,
+                  LoMask.getValueType().changeElementType(MVT::i32), LoMask);
   SDValue Offset = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, WideMask);
   Offset = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Offset);
 
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index f700dee0fb2e4..cfd343e94baa4 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -145,17 +145,17 @@ define <vscale x 8 x i32> @test_compress_large(<vscale x 8 x i32> %vec, <vscale
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p1.h, p0.b
 ; CHECK-NEXT:    cnth x9
-; CHECK-NEXT:    ptrue p2.s
-; CHECK-NEXT:    sub x9, x9, #1
 ; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    sub x9, x9, #1
+; CHECK-NEXT:    cntp x8, p1, p1.s
 ; CHECK-NEXT:    compact z0.s, p1, z0.s
-; CHECK-NEXT:    cntp x8, p2, p1.s
 ; CHECK-NEXT:    compact z1.s, p0, z1.s
-; CHECK-NEXT:    str z0, [sp]
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    cmp x8, x9
+; CHECK-NEXT:    str z0, [sp]
 ; CHECK-NEXT:    csel x8, x8, x9, lo
 ; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1w { z1.s }, p2, [x9, x8, lsl #2]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x9, x8, lsl #2]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CHECK-NEXT:    addvl sp, sp, #2
@@ -231,23 +231,24 @@ define <4 x double> @test_compress_v4f64_with_sve(<4 x double> %vec, <4 x i1> %m
 ; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    ushll v2.4s, v2.4h, #0
-; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    movi v5.2s, #1
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ushll v3.2d, v2.2s, #0
 ; CHECK-NEXT:    ushll2 v4.2d, v2.4s, #0
-; CHECK-NEXT:    fmov x8, d2
+; CHECK-NEXT:    and v2.8b, v2.8b, v5.8b
 ; CHECK-NEXT:    shl v3.2d, v3.2d, #63
 ; CHECK-NEXT:    shl v4.2d, v4.2d, #63
-; CHECK-NEXT:    lsr x9, x8, #32
-; CHECK-NEXT:    eor w8, w8, w9
-; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    addp v2.2s, v2.2s, v2.2s
 ; CHECK-NEXT:    cmlt v3.2d, v3.2d, #0
 ; CHECK-NEXT:    cmlt v4.2d, v4.2d, #0
-; CHECK-NEXT:    and x8, x8, #0x3
-; CHECK-NEXT:    lsl x8, x8, #3
+; CHECK-NEXT:    fmov w8, s2
 ; CHECK-NEXT:    and z3.d, z3.d, #0x1
 ; CHECK-NEXT:    and z4.d, z4.d, #0x1
+; CHECK-NEXT:    and x8, x8, #0x3
+; CHECK-NEXT:    lsl x8, x8, #3
 ; CHECK-NEXT:    cmpne p1.d, p0/z, z3.d, #0
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z4.d, #0
 ; CHECK-NEXT:    compact z0.d, p1, z0.d

``````````

</details>


https://github.com/llvm/llvm-project/pull/171459