[llvm] wasm: recognize `any_true` and `all_true` (PR #155885)

Sat Aug 30 12:24:35 PDT 2025

folkertdev wrote:

I've now implemented a manual combine between `setcc` and `vecreduce_{and, or}`. That works really well in most cases, but leads to some changes in tests.

This seems acceptable to me:

```diff
 ; SIMD128-LABEL: pairwise_or_v2i64:
 ; SIMD128:         .functype pairwise_or_v2i64 (v128) -> (i64)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
-; SIMD128-NEXT:    v128.or $push1=, $0, $pop0
-; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
+; SIMD128-NEXT:    i64x2.extract_lane $push1=, $0, 0
+; SIMD128-NEXT:    i64x2.extract_lane $push0=, $0, 1
+; SIMD128-NEXT:    i64.or $push2=, $pop1, $pop0
 ; SIMD128-NEXT:    return $pop2
   %res = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %arg)
   ret i64 %res
```

But in some cases it looks like the previous implementation was a lot smarter about the vector reduction than the default lowering. 

```diff
 ; SIMD128-LABEL: pairwise_or_v8i16:
 ; SIMD128:         .functype pairwise_or_v8i16 (v128) -> (i32)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT:    v128.or $push8=, $0, $pop0
-; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
-; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT:    v128.or $push6=, $pop7, $pop1
-; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
-; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT:    v128.or $push3=, $pop5, $pop2
-; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
-; SIMD128-NEXT:    return $pop4
+; SIMD128-NEXT:    i16x8.extract_lane_u $push1=, $0, 0
+; SIMD128-NEXT:    i16x8.extract_lane_u $push0=, $0, 1
+; SIMD128-NEXT:    i32.or $push2=, $pop1, $pop0
+; SIMD128-NEXT:    i16x8.extract_lane_u $push3=, $0, 2
+; SIMD128-NEXT:    i32.or $push4=, $pop2, $pop3
+; SIMD128-NEXT:    i16x8.extract_lane_u $push5=, $0, 3
+; SIMD128-NEXT:    i32.or $push6=, $pop4, $pop5
+; SIMD128-NEXT:    i16x8.extract_lane_u $push7=, $0, 4
+; SIMD128-NEXT:    i32.or $push8=, $pop6, $pop7
+; SIMD128-NEXT:    i16x8.extract_lane_u $push9=, $0, 5
+; SIMD128-NEXT:    i32.or $push10=, $pop8, $pop9
+; SIMD128-NEXT:    i16x8.extract_lane_u $push11=, $0, 6
+; SIMD128-NEXT:    i32.or $push12=, $pop10, $pop11
+; SIMD128-NEXT:    i16x8.extract_lane_u $push13=, $0, 7
+; SIMD128-NEXT:    i32.or $push14=, $pop12, $pop13
+; SIMD128-NEXT:    return $pop14
   %res = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %arg)
   ret i16 %res
 }
```

What is the best way forward here?

https://github.com/llvm/llvm-project/pull/155885