[llvm] [WebAssembly] Optimize away mask of 63 for shl ( zext (and i32 63))) (PR #152397)

Mon Dec 1 03:03:06 PST 2025

https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/152397

>From b707a7dffc37f9ee1c79377c2234d73969a9523e Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Wed, 6 Aug 2025 15:06:18 -0700
Subject: [PATCH 1/4] Precommit test

---
 llvm/test/CodeGen/WebAssembly/masked-shifts.ll | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
index 5bcb023e546b5..47b3b5c9fdc0f 100644
--- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
+++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
@@ -18,6 +18,23 @@ define i32 @shl_i32(i32 %v, i32 %x) {
   ret i32 %a
 }
 
+define i64 @shl_i64_i32(i64 %v, i32 %x) {
+; CHECK-LABEL: shl_i64_i32:
+; CHECK:         .functype shl_i64_i32 (i64, i32) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 63
+; CHECK-NEXT:    i32.and
+; CHECK-NEXT:    i64.extend_i32_u
+; CHECK-NEXT:    i64.shl
+; CHECK-NEXT:    # fallthrough-return
+  %m = and i32 %x, 63
+  %z = zext i32 %m to i64
+  %a = shl i64 %v, %z
+  ret i64 %a
+}
+
 define i32 @sra_i32(i32 %v, i32 %x) {
 ; CHECK-LABEL: sra_i32:
 ; CHECK:         .functype sra_i32 (i32, i32) -> (i32)

>From 40e9092ace4dec089eaea6c65d480c9bd79eafcd Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Thu, 7 Aug 2025 10:38:23 -0700
Subject: [PATCH 2/4] Optimize away mask of 63 for shl ( zext (and i32 63)))

---
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   | 35 +++++++++++++++++++
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  1 -
 .../WebAssembly/WebAssemblyInstrInfo.td       |  2 +-
 .../WebAssembly/WebAssemblyInstrInteger.td    |  7 ++++
 .../test/CodeGen/WebAssembly/masked-shifts.ll |  2 --
 5 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index b03b35028c69c..202c770bf8e8f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -70,6 +70,11 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
   bool SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr);
   bool SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr);
 
+  bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
+
+  bool selectShiftMask64FromI32(SDValue N, SDValue &ShAmt) {
+    return selectShiftMask(N, 64, ShAmt);
+  }
 // Include the pieces autogenerated from the target description.
 #include "WebAssemblyGenDAGISel.inc"
 
@@ -539,6 +544,36 @@ bool WebAssemblyDAGToDAGISel::SelectAddrOperands64(SDValue Op, SDValue &Offset,
   return SelectAddrOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr);
 }
 
+bool WebAssemblyDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
+                                              SDValue &ShAmt) {
+
+  ShAmt = N;
+
+  if (ShAmt.getOpcode() == ISD::AND &&
+      isa<ConstantSDNode>(ShAmt.getOperand(1))) {
+    const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
+
+    // Since the max shift amount is a power of 2 we can subtract 1 to make a
+    // mask that covers the bits needed to represent all shift amounts.
+    assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
+    APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
+
+    if (ShMask.isSubsetOf(AndMask)) {
+      ShAmt = ShAmt.getOperand(0);
+    } else {
+      // SimplifyDemandedBits may have optimized the mask so try restoring any
+      // bits that are known zero.
+      KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
+      if (ShMask.isSubsetOf(AndMask | Known.Zero))
+        ShAmt = ShAmt.getOperand(0);
+    }
+    return true;
+  }
+
+  // TODO: Port rest of riscv if applicable
+  return false;
+}
+
 /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
 /// for instruction scheduling.
 FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 3f80b2ab2bd6d..7df4e2590c212 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3343,7 +3343,6 @@ static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
       Ret = DAG.getNOT(DL, Ret, MVT::i1);
     return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
   };
-
   if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
                                        Intrinsic::wasm_alltrue))
     return AnyTrueEQ;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 13d048a98d6ea..ce4db2e112fa0 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -460,8 +460,8 @@ def : Pat<(i64 (WebAssemblyWrapperREL texternalsym:$addr)),
 include "WebAssemblyInstrMemory.td"
 include "WebAssemblyInstrCall.td"
 include "WebAssemblyInstrControl.td"
-include "WebAssemblyInstrInteger.td"
 include "WebAssemblyInstrConv.td"
+include "WebAssemblyInstrInteger.td"
 include "WebAssemblyInstrFloat.td"
 include "WebAssemblyInstrAtomics.td"
 include "WebAssemblyInstrSIMD.td"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index d4c8f92c883e7..8505608d42c4c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -44,6 +44,10 @@ multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32>
                 !strconcat("i64.", name), i64Inst>;
 }
 
+// ComplexPattern
+def shiftMask64FromI32
+    : ComplexPattern<i32, 1, "selectShiftMask64FromI32", [], [], 0>;
+
 // The spaces after the names are for aesthetic purposes only, to make
 // operands line up vertically after tab expansion.
 let isCommutable = 1 in
@@ -101,6 +105,9 @@ def : Pat<(shl I64:$lhs, (and I64:$rhs, 63)), (SHL_I64 I64:$lhs, I64:$rhs)>;
 def : Pat<(sra I64:$lhs, (and I64:$rhs, 63)), (SHR_S_I64 I64:$lhs, I64:$rhs)>;
 def : Pat<(srl I64:$lhs, (and I64:$rhs, 63)), (SHR_U_I64 I64:$lhs, I64:$rhs)>;
 
+def : Pat<(shl I64:$lhs, (zext(shiftMask64FromI32 I32:$rhs))),
+          (SHL_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
+
 // Optimize away an explicit mask on a rotate count.
 def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>;
 def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>;
diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
index 47b3b5c9fdc0f..45c79df5f3f2b 100644
--- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
+++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
@@ -24,8 +24,6 @@ define i64 @shl_i64_i32(i64 %v, i32 %x) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i32.const 63
-; CHECK-NEXT:    i32.and
 ; CHECK-NEXT:    i64.extend_i32_u
 ; CHECK-NEXT:    i64.shl
 ; CHECK-NEXT:    # fallthrough-return

>From 8f1cd06a0c9ffc7157f4d7d62d2085fd5ff7e2f2 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Thu, 27 Nov 2025 16:18:14 -0800
Subject: [PATCH 3/4] Simplify "Optimize away mask of 63 for shl ( zext (and
 i32 63)))"

This reverts commit 40e9092ace4dec089eaea6c65d480c9bd79eafcd and replace
it with a simple constant 63 mask
---
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   | 35 -------------------
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  1 +
 .../WebAssembly/WebAssemblyInstrInteger.td    | 10 ++----
 3 files changed, 4 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 202c770bf8e8f..b03b35028c69c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -70,11 +70,6 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
   bool SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr);
   bool SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr);
 
-  bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
-
-  bool selectShiftMask64FromI32(SDValue N, SDValue &ShAmt) {
-    return selectShiftMask(N, 64, ShAmt);
-  }
 // Include the pieces autogenerated from the target description.
 #include "WebAssemblyGenDAGISel.inc"
 
@@ -544,36 +539,6 @@ bool WebAssemblyDAGToDAGISel::SelectAddrOperands64(SDValue Op, SDValue &Offset,
   return SelectAddrOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr);
 }
 
-bool WebAssemblyDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
-                                              SDValue &ShAmt) {
-
-  ShAmt = N;
-
-  if (ShAmt.getOpcode() == ISD::AND &&
-      isa<ConstantSDNode>(ShAmt.getOperand(1))) {
-    const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
-
-    // Since the max shift amount is a power of 2 we can subtract 1 to make a
-    // mask that covers the bits needed to represent all shift amounts.
-    assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
-    APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
-
-    if (ShMask.isSubsetOf(AndMask)) {
-      ShAmt = ShAmt.getOperand(0);
-    } else {
-      // SimplifyDemandedBits may have optimized the mask so try restoring any
-      // bits that are known zero.
-      KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
-      if (ShMask.isSubsetOf(AndMask | Known.Zero))
-        ShAmt = ShAmt.getOperand(0);
-    }
-    return true;
-  }
-
-  // TODO: Port rest of riscv if applicable
-  return false;
-}
-
 /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
 /// for instruction scheduling.
 FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 7df4e2590c212..3f80b2ab2bd6d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3343,6 +3343,7 @@ static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
       Ret = DAG.getNOT(DL, Ret, MVT::i1);
     return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
   };
+
   if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
                                        Intrinsic::wasm_alltrue))
     return AnyTrueEQ;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index 8505608d42c4c..eb692679f5971 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -44,10 +44,6 @@ multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32>
                 !strconcat("i64.", name), i64Inst>;
 }
 
-// ComplexPattern
-def shiftMask64FromI32
-    : ComplexPattern<i32, 1, "selectShiftMask64FromI32", [], [], 0>;
-
 // The spaces after the names are for aesthetic purposes only, to make
 // operands line up vertically after tab expansion.
 let isCommutable = 1 in
@@ -105,15 +101,15 @@ def : Pat<(shl I64:$lhs, (and I64:$rhs, 63)), (SHL_I64 I64:$lhs, I64:$rhs)>;
 def : Pat<(sra I64:$lhs, (and I64:$rhs, 63)), (SHR_S_I64 I64:$lhs, I64:$rhs)>;
 def : Pat<(srl I64:$lhs, (and I64:$rhs, 63)), (SHR_U_I64 I64:$lhs, I64:$rhs)>;
 
-def : Pat<(shl I64:$lhs, (zext(shiftMask64FromI32 I32:$rhs))),
-          (SHL_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
-
 // Optimize away an explicit mask on a rotate count.
 def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>;
 def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>;
 def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>;
 def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>;
 
+def : Pat<(shl I64:$lhs, (zext (and I32:$rhs, 63))),
+                               (SHL_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
+
 defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
                     (outs), (ins),
                     [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],

>From cc4b02f07b16192a2b55db07a6884d4c21b19967 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Mon, 1 Dec 2025 03:01:47 -0800
Subject: [PATCH 4/4] Rename test case

---
 llvm/test/CodeGen/WebAssembly/masked-shifts.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
index 45c79df5f3f2b..368f30fd5d7ed 100644
--- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
+++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
@@ -18,9 +18,9 @@ define i32 @shl_i32(i32 %v, i32 %x) {
   ret i32 %a
 }
 
-define i64 @shl_i64_i32(i64 %v, i32 %x) {
-; CHECK-LABEL: shl_i64_i32:
-; CHECK:         .functype shl_i64_i32 (i64, i32) -> (i64)
+define i64 @shl_i64_zext(i64 %v, i32 %x) {
+; CHECK-LABEL: shl_i64_zext:
+; CHECK:         .functype shl_i64_zext (i64, i32) -> (i64)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    local.get 1