[llvm] [WebAssembly] Fix operand order in performBitcastCombine (PR #190361)

Sun Apr 5 00:32:23 PDT 2026

https://github.com/xiongzile updated https://github.com/llvm/llvm-project/pull/190361

>From 373cb954d51980571b08f44e7ba1d5964390320a Mon Sep 17 00:00:00 2001
From: Zile Xiong <xiongzile99 at gmail.com>
Date: Sat, 4 Apr 2026 00:36:51 +0800
Subject: [PATCH] [WebAssembly] Fix operand order in performBitcastCombine for
 wide <N x i1> -> iN bitmask reconstruction

In performBitcastCombine, when reconstructing i32/i64 bitmask from
multiple v16i1 SetCC results (for N=32 and N=64 cases), the code was
building SHL nodes with reversed operands:

    SHL(16, ReturningInteger)   // wrong

SelectionDAG::getNode(ISD::SHL, ...) expects:
- operand 0: value to be shifted
- operand 1: shift amount

This produced incorrect DAGs such as `shl Constant<16>, xxx`, leading
to wrong codegen for vector bitmask patterns.

Fixed by swapping the operands to the correct order:
    SHL(ReturningInteger, 16)

Fixes: https://github.com/llvm/llvm-project/issues/190358
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  2 +-
 .../WebAssembly/simd-illegal-bitmask.ll       | 89 +++++++++----------
 2 files changed, 41 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 47de46a6f7070..9e2ebe9a47761 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3360,7 +3360,7 @@ static SDValue performBitcastCombine(SDNode *N,
     for (SDValue V : VectorsToShuffle) {
       ReturningInteger = DAG.getNode(
           ISD::SHL, DL, ReturnType,
-          {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
+          {ReturningInteger, DAG.getShiftAmountConstant(16, ReturnType, DL)});
 
       SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
       ReturningInteger =
diff --git a/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
index e497edc08c4eb..314b3247ee06b 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
@@ -22,18 +22,16 @@ define i32 @optimize_illegal_bitcast_v32i8(<32 x i8> %x) {
 ; CHECK-LABEL: optimize_illegal_bitcast_v32i8:
 ; CHECK:         .functype optimize_illegal_bitcast_v32i8 (v128, v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i32.const $push2=, 16
-; CHECK-NEXT:    v128.const $push10=, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
-; CHECK-NEXT:    local.tee $push9=, $2=, $pop10
-; CHECK-NEXT:    i8x16.eq $push0=, $0, $pop9
+; CHECK-NEXT:    v128.const $push8=, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+; CHECK-NEXT:    local.tee $push7=, $2=, $pop8
+; CHECK-NEXT:    i8x16.eq $push0=, $0, $pop7
 ; CHECK-NEXT:    i8x16.bitmask $push1=, $pop0
-; CHECK-NEXT:    i32.const $push8=, 16
-; CHECK-NEXT:    i32.add $push3=, $pop1, $pop8
-; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop3
-; CHECK-NEXT:    i8x16.eq $push5=, $1, $2
-; CHECK-NEXT:    i8x16.bitmask $push6=, $pop5
-; CHECK-NEXT:    i32.add $push7=, $pop4, $pop6
-; CHECK-NEXT:    return $pop7
+; CHECK-NEXT:    i32.const $push2=, 16
+; CHECK-NEXT:    i32.shl $push3=, $pop1, $pop2
+; CHECK-NEXT:    i8x16.eq $push4=, $1, $2
+; CHECK-NEXT:    i8x16.bitmask $push5=, $pop4
+; CHECK-NEXT:    i32.or $push6=, $pop3, $pop5
+; CHECK-NEXT:    return $pop6
     %z = icmp eq <32 x i8> %x, splat (i8 32)
     %res = bitcast <32 x i1> %z to i32
     ret i32 %res
@@ -44,18 +42,16 @@ define i32 @optimize_illegal_bitcast_v32i8_const_step_vec(<32 x i8> %x) {
 ; CHECK-LABEL: optimize_illegal_bitcast_v32i8_const_step_vec:
 ; CHECK:         .functype optimize_illegal_bitcast_v32i8_const_step_vec (v128, v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i32.const $push3=, 16
 ; CHECK-NEXT:    v128.const $push0=, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 ; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
 ; CHECK-NEXT:    i8x16.bitmask $push2=, $pop1
-; CHECK-NEXT:    i32.const $push10=, 16
-; CHECK-NEXT:    i32.add $push4=, $pop2, $pop10
-; CHECK-NEXT:    i32.shl $push5=, $pop3, $pop4
-; CHECK-NEXT:    v128.const $push6=, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
-; CHECK-NEXT:    i8x16.eq $push7=, $1, $pop6
-; CHECK-NEXT:    i8x16.bitmask $push8=, $pop7
-; CHECK-NEXT:    i32.add $push9=, $pop5, $pop8
-; CHECK-NEXT:    return $pop9
+; CHECK-NEXT:    i32.const $push3=, 16
+; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop3
+; CHECK-NEXT:    v128.const $push5=, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
+; CHECK-NEXT:    i8x16.eq $push6=, $1, $pop5
+; CHECK-NEXT:    i8x16.bitmask $push7=, $pop6
+; CHECK-NEXT:    i32.or $push8=, $pop4, $pop7
+; CHECK-NEXT:    return $pop8
     %const_step_vec =  add <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8,
                               i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16,
                               i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24,
@@ -70,16 +66,14 @@ define i32 @optimize_illegal_bitcast_v32i8_non_const_vec(<32 x i8> %x, <32 x i8>
 ; CHECK-LABEL: optimize_illegal_bitcast_v32i8_non_const_vec:
 ; CHECK:         .functype optimize_illegal_bitcast_v32i8_non_const_vec (v128, v128, v128, v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i32.const $push2=, 16
 ; CHECK-NEXT:    i8x16.eq $push0=, $0, $2
 ; CHECK-NEXT:    i8x16.bitmask $push1=, $pop0
-; CHECK-NEXT:    i32.const $push8=, 16
-; CHECK-NEXT:    i32.add $push3=, $pop1, $pop8
-; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop3
-; CHECK-NEXT:    i8x16.eq $push5=, $1, $3
-; CHECK-NEXT:    i8x16.bitmask $push6=, $pop5
-; CHECK-NEXT:    i32.add $push7=, $pop4, $pop6
-; CHECK-NEXT:    return $pop7
+; CHECK-NEXT:    i32.const $push2=, 16
+; CHECK-NEXT:    i32.shl $push3=, $pop1, $pop2
+; CHECK-NEXT:    i8x16.eq $push4=, $1, $3
+; CHECK-NEXT:    i8x16.bitmask $push5=, $pop4
+; CHECK-NEXT:    i32.or $push6=, $pop3, $pop5
+; CHECK-NEXT:    return $pop6
     %z = icmp eq <32 x i8> %x, %y
     %res = bitcast <32 x i1> %z to i32
     ret i32 %res
@@ -92,31 +86,28 @@ define i64 @optimize_illegal_bitcast_v64i8(<64 x i8> %x) {
 ; CHECK-LABEL: optimize_illegal_bitcast_v64i8:
 ; CHECK:         .functype optimize_illegal_bitcast_v64i8 (v128, v128, v128, v128) -> (i64)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i64.const $push3=, 16
-; CHECK-NEXT:    i64.const $push24=, 16
-; CHECK-NEXT:    i64.const $push23=, 16
-; CHECK-NEXT:    v128.const $push22=, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
-; CHECK-NEXT:    local.tee $push21=, $4=, $pop22
-; CHECK-NEXT:    i8x16.eq $push0=, $0, $pop21
+; CHECK-NEXT:    v128.const $push21=, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+; CHECK-NEXT:    local.tee $push20=, $4=, $pop21
+; CHECK-NEXT:    i8x16.eq $push0=, $0, $pop20
 ; CHECK-NEXT:    i8x16.bitmask $push1=, $pop0
-; CHECK-NEXT:    i64.extend_i32_u $push2=, $pop1
-; CHECK-NEXT:    i64.const $push20=, 16
-; CHECK-NEXT:    i64.add $push4=, $pop2, $pop20
-; CHECK-NEXT:    i64.shl $push5=, $pop23, $pop4
-; CHECK-NEXT:    i8x16.eq $push6=, $1, $4
-; CHECK-NEXT:    i8x16.bitmask $push7=, $pop6
-; CHECK-NEXT:    i64.extend_i32_u $push8=, $pop7
-; CHECK-NEXT:    i64.add $push9=, $pop5, $pop8
-; CHECK-NEXT:    i64.shl $push10=, $pop24, $pop9
-; CHECK-NEXT:    i8x16.eq $push11=, $2, $4
-; CHECK-NEXT:    i8x16.bitmask $push12=, $pop11
-; CHECK-NEXT:    i64.extend_i32_u $push13=, $pop12
-; CHECK-NEXT:    i64.add $push14=, $pop10, $pop13
-; CHECK-NEXT:    i64.shl $push15=, $pop3, $pop14
+; CHECK-NEXT:    i32.const $push2=, 16
+; CHECK-NEXT:    i32.shl $push3=, $pop1, $pop2
+; CHECK-NEXT:    i8x16.eq $push4=, $1, $4
+; CHECK-NEXT:    i8x16.bitmask $push5=, $pop4
+; CHECK-NEXT:    i32.or $push6=, $pop3, $pop5
+; CHECK-NEXT:    i64.extend_i32_u $push7=, $pop6
+; CHECK-NEXT:    i64.const $push8=, 32
+; CHECK-NEXT:    i64.shl $push9=, $pop7, $pop8
+; CHECK-NEXT:    i8x16.eq $push10=, $2, $4
+; CHECK-NEXT:    i8x16.bitmask $push11=, $pop10
+; CHECK-NEXT:    i64.extend_i32_u $push12=, $pop11
+; CHECK-NEXT:    i64.const $push13=, 16
+; CHECK-NEXT:    i64.shl $push14=, $pop12, $pop13
+; CHECK-NEXT:    i64.or $push15=, $pop9, $pop14
 ; CHECK-NEXT:    i8x16.eq $push16=, $3, $4
 ; CHECK-NEXT:    i8x16.bitmask $push17=, $pop16
 ; CHECK-NEXT:    i64.extend_i32_u $push18=, $pop17
-; CHECK-NEXT:    i64.add $push19=, $pop15, $pop18
+; CHECK-NEXT:    i64.or $push19=, $pop15, $pop18
 ; CHECK-NEXT:    return $pop19
     %z = icmp eq <64 x i8> %x, splat (i8 64)
     %res = bitcast <64 x i1> %z to i64