[llvm] Fix 50142 (PR #144741)

Wed Jun 18 09:08:36 PDT 2025

https://github.com/badumbatish created https://github.com/llvm/llvm-project/pull/144741

Fix a miss of further vectorization introduced in #50142 , where we can only achieve zext (xor (any_true), -1). 

Now in test case issue50142, it's converted to all_true.


>From 6c6f12337092ac60c1b48b5b7e6311bb8af124aa Mon Sep 17 00:00:00 2001
From: badumbatish <tanghocle456 at gmail.com>
Date: Wed, 18 Jun 2025 09:01:16 -0700
Subject: [PATCH] Fix 50142

Fix a miss of further vectorization introduced in 50142, where we can
only achieve zext (xor (any_true), -1). Now in test case issue50142,
it's converted to all_true.
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 31 +++++++++++++++++++
 llvm/test/CodeGen/WebAssembly/issue50142.ll   | 20 ++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/issue50142.ll

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 3cd923c0ba058..84a18f74867a5 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "WebAssemblyTargetMachine.h"
 #include "WebAssemblyUtilities.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -36,6 +37,7 @@
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
+#include <iostream>
 using namespace llvm;
 
 #define DEBUG_TYPE "wasm-lower"
@@ -3248,6 +3250,35 @@ static SDValue performSETCCCombine(SDNode *N,
   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
+  //  N           LHS     LhsL        LhsLL    LhsLR   InnerCond RHS Cond
+  // setcc (iN (bitcast (setcc vNi1 (vNiY X), <vNiY 0>, eq)),     0, eq
+  // => all_true (vNi1 X)
+  if (DCI.isBeforeLegalize() && VT.isScalarInteger() && (Cond == ISD::SETEQ) &&
+      (isNullConstant(RHS)) && LHS->getOpcode() == ISD::BITCAST) {
+    SDValue LhsL = LHS.getOperand(0);
+    EVT LhsLType = LhsL.getValueType();
+    ISD::CondCode InnerCond = cast<CondCodeSDNode>(LhsL->getOperand(2))->get();
+    if (LhsL.getOpcode() == ISD::SETCC && InnerCond == ISD::SETEQ) {
+      SDValue LhsLL = LhsL.getOperand(0); // vNi1 X
+      SDValue LhsLR = LhsL.getOperand(1); // 0
+      unsigned NumElts = LhsLType.getVectorNumElements();
+      bool Vectorizable =
+          NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16;
+      EVT Width = MVT::getIntegerVT(128 / NumElts);
+      // EVT LhsLLType = LhsLL.getValueType();
+
+      if (Vectorizable && LhsLR.getOpcode() == ISD::BUILD_VECTOR &&
+          LhsLType.isFixedLengthVector()) {
+        return DAG.getZExtOrTrunc(
+            DAG.getNode(
+                ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+                {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
+                 DAG.getSExtOrTrunc(LhsLL, DL,
+                                    LhsLType.changeVectorElementType(Width))}),
+            DL, MVT::i1);
+      }
+    }
+  }
 
   // setcc (iN (bitcast (vNi1 X))), 0, ne
   //   ==> any_true (vNi1 X)
diff --git a/llvm/test/CodeGen/WebAssembly/issue50142.ll b/llvm/test/CodeGen/WebAssembly/issue50142.ll
new file mode 100644
index 0000000000000..24ba941e76ee2
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/issue50142.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=wasm32-- -mattr=+simd128 | FileCheck --check-prefix=CHECK %s
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define hidden range(i32 0, 2) i32 @all_true(ptr noundef readonly captures(none) %a) local_unnamed_addr #0 {
+; CHECK-LABEL: all_true:
+; CHECK:         .functype all_true (i32) -> (i32)
+; CHECK: local.get 0
+; CHECK-NEXT: v128.load 0:p2align=0
+; CHECK-NEXT: i8x16.all_true
+; CHECK-NEXT:    # fallthrough-return
+; CHECK-NEXT: end_function 
+entry:
+  %0 = load <16 x i8>, ptr %a, align 1
+  %.fr = freeze <16 x i8> %0
+  %1 = icmp eq <16 x i8> %.fr, zeroinitializer
+  %2 = bitcast <16 x i1> %1 to i16
+  %3 = icmp eq i16 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}