[llvm] [WebAssembly] Fix missed optimization in 50142 (PR #144741)

Wed Jun 18 13:47:29 PDT 2025

https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/144741

>From 48595b3232344d7831748b134b9dc16498c36ba4 Mon Sep 17 00:00:00 2001
From: badumbatish <tanghocle456 at gmail.com>
Date: Wed, 18 Jun 2025 09:01:16 -0700
Subject: [PATCH] Fix 50142

Fix a miss of further vectorization introduced in 50142, where we can
only achieve zext (xor (any_true), -1). Now in test case issue50142,
it's converted to all_true.
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 32 +++++++++++++++++++
 .../WebAssembly/simd-setcc-reductions.ll      | 22 +++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 3cd923c0ba058..be1daad3baf7f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "WebAssemblyTargetMachine.h"
 #include "WebAssemblyUtilities.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -3248,6 +3249,37 @@ static SDValue performSETCCCombine(SDNode *N,
   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
+  //  N           LHS     LhsL        LhsLL    LhsLR   InnerCond RHS Cond
+  // setcc (iN (bitcast (setcc vNi1 (vNiY X), <vNiY 0>, eq)),     0, eq
+  // => all_true (vNi1 X)
+  if (DCI.isBeforeLegalize() && VT.isScalarInteger() && (Cond == ISD::SETEQ) &&
+      (isNullConstant(RHS)) && LHS->getOpcode() == ISD::BITCAST) {
+    SDValue LhsL = LHS.getOperand(0);
+    EVT LhsLType = LhsL.getValueType();
+    if (LhsL.getOpcode() == ISD::SETCC) {
+      ISD::CondCode InnerCond =
+          cast<CondCodeSDNode>(LhsL->getOperand(2))->get();
+      if (InnerCond == ISD::SETEQ) {
+        SDValue LhsLL = LhsL.getOperand(0); // vNiY X
+        SDValue LhsLR = LhsL.getOperand(1); // <0>
+        unsigned NumElts = LhsLType.getVectorNumElements();
+        bool Vectorizable =
+            NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16;
+        EVT Width = MVT::getIntegerVT(128 / NumElts);
+
+        if (Vectorizable && LhsLR.getOpcode() == ISD::BUILD_VECTOR &&
+            LhsLType.isFixedLengthVector()) {
+          return DAG.getZExtOrTrunc(
+              DAG.getNode(
+                  ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+                  {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
+                   DAG.getSExtOrTrunc(
+                       LhsLL, DL, LhsLType.changeVectorElementType(Width))}),
+              DL, MVT::i1);
+        }
+      }
+    }
+  }
 
   // setcc (iN (bitcast (vNi1 X))), 0, ne
   //   ==> any_true (vNi1 X)
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
new file mode 100644
index 0000000000000..fa2487f58f3cf
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm64"
+
+;CHECK:all_true:
+;CHECK-NEXT:        .functype       all_true (i64) -> (i32)
+;CHECK-NEXT: v128.load       $push0=, 0($0):p2align=0
+;CHECK-NEXT:        i8x16.all_true  $push1=, $pop0
+;CHECK-NEXT:        return  $pop1
+;CHECK-NEXT:        end_function
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define hidden range(i32 0, 2) i32 @all_true(ptr noundef readonly captures(none) %a) local_unnamed_addr #0 {
+entry:
+  %0 = load <16 x i8>, ptr %a, align 1
+  %.fr = freeze <16 x i8> %0
+  %1 = icmp eq <16 x i8> %.fr, zeroinitializer
+  %2 = bitcast <16 x i1> %1 to i16
+  %3 = icmp eq i16 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+