[llvm] [WebAssembly] Optimizes [zext](and 63|32) for shl, srl and sra with selectShiftMask (PR #170572)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 9 03:06:14 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-webassembly
Author: Jasmine Tang (badumbatish)
<details>
<summary>Changes</summary>
selectShiftMask takes inspiration from RISCV's llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp.
---
Patch is 107.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170572.diff
5 Files Affected:
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp (+52)
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td (+10-13)
- (modified) llvm/test/CodeGen/WebAssembly/disable-feature.ll (-2)
- (modified) llvm/test/CodeGen/WebAssembly/legalize.ll (+1-3)
- (modified) llvm/test/CodeGen/WebAssembly/simd-arith.ll (+692-1016)
``````````diff
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 2541b0433ab59..047eefb2d96ea 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -70,6 +70,15 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
bool SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr);
bool SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr);
+ bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
+
+ bool selectShiftMask32(SDValue N, SDValue &ShAmt) {
+ return selectShiftMask(N, 32, ShAmt);
+ }
+
+ bool selectShiftMask64(SDValue N, SDValue &ShAmt) {
+ return selectShiftMask(N, 64, ShAmt);
+ }
// Include the pieces autogenerated from the target description.
#include "WebAssemblyGenDAGISel.inc"
@@ -548,6 +557,49 @@ bool WebAssemblyDAGToDAGISel::SelectAddrOperands64(SDValue Op, SDValue &Offset,
return SelectAddrOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr);
}
+bool WebAssemblyDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
+ SDValue &ShAmt) {
+
+ ShAmt = N;
+
+ bool HasZext = false;
+ // Peek through zext.
+ if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) {
+ ShAmt = ShAmt.getOperand(0);
+ HasZext = true;
+ }
+
+ if (ShAmt.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt.getOperand(1))) {
+ const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
+
+ // Since the max shift amount is a power of 2 we can subtract 1 to make a
+ // mask that covers the bits needed to represent all shift amounts.
+ assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
+ APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
+
+ if (ShMask.isSubsetOf(AndMask)) {
+ ShAmt = ShAmt.getOperand(0);
+ } else {
+ // TODO: port computeKnownBits from riscv in another PR about rotr and
+ // rotl
+ return false;
+ }
+
+ // Only reinstate zext if it's i32 -> i64, WebAssembly would have legalize
+ // i16 to i32 in the dag otherwise.
+ if (HasZext && ShiftWidth == 64) {
+ ShAmt = SDValue(CurDAG->getMachineNode(WebAssembly::I64_EXTEND_U_I32,
+ SDLoc(N), MVT::i64, ShAmt),
+ 0);
+ }
+ return true;
+ }
+
+ // TODO: Port rest of riscv if applicable
+ return false;
+}
+
/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
/// for instruction scheduling.
FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index 991507e883f28..3dbb777187374 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -44,6 +44,10 @@ multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32>
!strconcat("i64.", name), i64Inst>;
}
+// ComplexPattern
+def shiftMask32 : ComplexPattern<i32, 1, "selectShiftMask32", [], [], 0>;
+def shiftMask64 : ComplexPattern<i64, 1, "selectShiftMask64", [], [], 0>;
+
// The spaces after the names are for aesthetic purposes only, to make
// operands line up vertically after tab expansion.
let isCommutable = 1 in
@@ -94,12 +98,12 @@ defm EQZ_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins),
"i64.eqz \t$dst, $src", "i64.eqz", 0x50>;
// Optimize away an explicit mask on a shift count.
-def : Pat<(shl I32:$lhs, (and I32:$rhs, 31)), (SHL_I32 I32:$lhs, I32:$rhs)>;
-def : Pat<(sra I32:$lhs, (and I32:$rhs, 31)), (SHR_S_I32 I32:$lhs, I32:$rhs)>;
-def : Pat<(srl I32:$lhs, (and I32:$rhs, 31)), (SHR_U_I32 I32:$lhs, I32:$rhs)>;
-def : Pat<(shl I64:$lhs, (and I64:$rhs, 63)), (SHL_I64 I64:$lhs, I64:$rhs)>;
-def : Pat<(sra I64:$lhs, (and I64:$rhs, 63)), (SHR_S_I64 I64:$lhs, I64:$rhs)>;
-def : Pat<(srl I64:$lhs, (and I64:$rhs, 63)), (SHR_U_I64 I64:$lhs, I64:$rhs)>;
+def : Pat<(shl I32:$lhs, (shiftMask32 I32:$rhs)), (SHL_I32 I32:$lhs, I32:$rhs)>;
+def : Pat<(sra I32:$lhs, (shiftMask32 I32:$rhs)), (SHR_S_I32 I32:$lhs, I32:$rhs)>;
+def : Pat<(srl I32:$lhs, (shiftMask32 I32:$rhs)), (SHR_U_I32 I32:$lhs, I32:$rhs)>;
+def : Pat<(shl I64:$lhs, (shiftMask64 I64:$rhs)), (SHL_I64 I64:$lhs, I64:$rhs)>;
+def : Pat<(sra I64:$lhs, (shiftMask64 I64:$rhs)), (SHR_S_I64 I64:$lhs, I64:$rhs)>;
+def : Pat<(srl I64:$lhs, (shiftMask64 I64:$rhs)), (SHR_U_I64 I64:$lhs, I64:$rhs)>;
// Optimize away an explicit mask on a rotate count.
def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>;
@@ -107,13 +111,6 @@ def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>;
def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>;
def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>;
-def : Pat<(shl I64:$lhs, (zext (and I32:$rhs, 63))),
- (SHL_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
-def : Pat<(sra I64:$lhs, (zext (and I32:$rhs, 63))),
- (SHR_S_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
-def : Pat<(srl I64:$lhs, (zext (and I32:$rhs, 63))),
- (SHR_U_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>;
-
defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
(outs), (ins),
[(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
diff --git a/llvm/test/CodeGen/WebAssembly/disable-feature.ll b/llvm/test/CodeGen/WebAssembly/disable-feature.ll
index 5f7275f3699ed..93f69c912f673 100644
--- a/llvm/test/CodeGen/WebAssembly/disable-feature.ll
+++ b/llvm/test/CodeGen/WebAssembly/disable-feature.ll
@@ -13,8 +13,6 @@ define i8 @not_use_extend8_s(i8 %v, i8 %x) {
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.shr_s
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32.const 255
-; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.shr_s
; CHECK-NEXT: # fallthrough-return
%a = ashr i8 %v, %x
diff --git a/llvm/test/CodeGen/WebAssembly/legalize.ll b/llvm/test/CodeGen/WebAssembly/legalize.ll
index 8710a0598d0d8..55c9f4cff5cc4 100644
--- a/llvm/test/CodeGen/WebAssembly/legalize.ll
+++ b/llvm/test/CodeGen/WebAssembly/legalize.ll
@@ -14,9 +14,7 @@ define i3 @shl_i3(i3 %a, i3 %b, ptr %p) {
}
; CHECK-LABEL: shl_i53:
-; CHECK: i64.const $push0=, 9007199254740991{{$}}
-; CHECK: i64.and $push1=, $1, $pop0{{$}}
-; CHECK: i64.shl $push2=, $0, $pop1{{$}}
+; CHECK: i64.shl $push0=, $0, $1
define i53 @shl_i53(i53 %a, i53 %b, ptr %p) {
%t = shl i53 %a, %b
ret i53 %t
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 60b4a837f7c31..acd383afb8283 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -3053,81 +3053,75 @@ define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-LABEL: shl_v16i8:
; NO-SIMD128: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push18=, $17, $pop0
-; NO-SIMD128-NEXT: local.tee $push17=, $17=, $pop18
-; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop17
-; NO-SIMD128-NEXT: i32.store8 15($0), $pop1
-; NO-SIMD128-NEXT: i32.shl $push2=, $15, $17
-; NO-SIMD128-NEXT: i32.store8 14($0), $pop2
-; NO-SIMD128-NEXT: i32.shl $push3=, $14, $17
-; NO-SIMD128-NEXT: i32.store8 13($0), $pop3
-; NO-SIMD128-NEXT: i32.shl $push4=, $13, $17
-; NO-SIMD128-NEXT: i32.store8 12($0), $pop4
-; NO-SIMD128-NEXT: i32.shl $push5=, $12, $17
-; NO-SIMD128-NEXT: i32.store8 11($0), $pop5
-; NO-SIMD128-NEXT: i32.shl $push6=, $11, $17
-; NO-SIMD128-NEXT: i32.store8 10($0), $pop6
-; NO-SIMD128-NEXT: i32.shl $push7=, $10, $17
-; NO-SIMD128-NEXT: i32.store8 9($0), $pop7
-; NO-SIMD128-NEXT: i32.shl $push8=, $9, $17
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop8
-; NO-SIMD128-NEXT: i32.shl $push9=, $8, $17
-; NO-SIMD128-NEXT: i32.store8 7($0), $pop9
-; NO-SIMD128-NEXT: i32.shl $push10=, $7, $17
-; NO-SIMD128-NEXT: i32.store8 6($0), $pop10
-; NO-SIMD128-NEXT: i32.shl $push11=, $6, $17
-; NO-SIMD128-NEXT: i32.store8 5($0), $pop11
-; NO-SIMD128-NEXT: i32.shl $push12=, $5, $17
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop12
-; NO-SIMD128-NEXT: i32.shl $push13=, $4, $17
-; NO-SIMD128-NEXT: i32.store8 3($0), $pop13
-; NO-SIMD128-NEXT: i32.shl $push14=, $3, $17
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop14
-; NO-SIMD128-NEXT: i32.shl $push15=, $2, $17
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop15
-; NO-SIMD128-NEXT: i32.shl $push16=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop16
+; NO-SIMD128-NEXT: i32.shl $push0=, $16, $17
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.shl $push1=, $15, $17
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.shl $push2=, $14, $17
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.shl $push3=, $13, $17
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.shl $push4=, $12, $17
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.shl $push5=, $11, $17
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.shl $push6=, $10, $17
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.shl $push7=, $9, $17
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.shl $push8=, $8, $17
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.shl $push9=, $7, $17
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.shl $push10=, $6, $17
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.shl $push11=, $5, $17
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.shl $push12=, $4, $17
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.shl $push13=, $3, $17
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.shl $push14=, $2, $17
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.shl $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_v16i8:
; NO-SIMD128-FAST: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $17, $pop0
-; NO-SIMD128-FAST-NEXT: local.tee $push17=, $17=, $pop18
-; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17
+; NO-SIMD128-FAST-NEXT: i32.shl $push0=, $1, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0
+; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $17
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5
-; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10
-; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11
-; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13
-; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14
-; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15
-; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2
+; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $5, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $6, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $7, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $8, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $9, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $10, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $11, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $12, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $13, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $14, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $15, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $16, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <16 x i8> undef, i8 %x, i32 0
%s = shufflevector <16 x i8> %t, <16 x i8> undef,
@@ -3469,139 +3463,75 @@ define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
; NO-SIMD128-LABEL: shl_vec_v16i8:
; NO-SIMD128: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0
-; NO-SIMD128-NEXT: i32.shl $push2=, $16, $pop1
-; NO-SIMD128-NEXT: i32.store8 15($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push47=, 255
-; NO-SIMD128-NEXT: i32.and $push3=, $31, $pop47
-; NO-SIMD128-NEXT: i32.shl $push4=, $15, $pop3
-; NO-SIMD128-NEXT: i32.store8 14($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push46=, 255
-; NO-SIMD128-NEXT: i32.and $push5=, $30, $pop46
-; NO-SIMD128-NEXT: i32.shl $push6=, $14, $pop5
-; NO-SIMD128-NEXT: i32.store8 13($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push45=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $29, $pop45
-; NO-SIMD128-NEXT: i32.shl $push8=, $13, $pop7
-; NO-SIMD128-NEXT: i32.store8 12($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push44=, 255
-; NO-SIMD128-NEXT: i32.and $push9=, $28, $pop44
-; NO-SIMD128-NEXT: i32.shl $push10=, $12, $pop9
-; NO-SIMD128-NEXT: i32.store8 11($0), $pop10
-; NO-SIMD128-NEXT: i32.const $push43=, 255
-; NO-SIMD128-NEXT: i32.and $push11=, $27, $pop43
-; NO-SIMD128-NEXT: i32.shl $push12=, $11, $pop11
-; NO-SIMD128-NEXT: i32.store8 10($0), $pop12
-; NO-SIMD128-NEXT: i32.const $push42=, 255
-; NO-SIMD128-NEXT: i32.and $push13=, $26, $pop42
-; NO-SIMD128-NEXT: i32.shl $push14=, $10, $pop13
-; NO-SIMD128-NEXT: i32.store8 9($0), $pop14
-; NO-SIMD128-NEXT: i32.const $push41=, 255
-; NO-SIMD128-NEXT: i32.and $push15=, $25, $pop41
-; NO-SIMD128-NEXT: i32.shl $push16=, $9, $pop15
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-NEXT: i32.const $push40=, 255
-; NO-SIMD128-NEXT: i32.and $push17=, $24, $pop40
-; NO-SIMD128-NEXT: i32.shl $push18=, $8, $pop17
-; NO-SIMD128-NEXT: i32.store8 7($0), $pop18
-; NO-SIMD128-NEXT: i32.const $push39=, 255
-; NO-SIMD128-NEXT: i32.and $push19=, $23, $pop39
-; NO-SIMD128-NEXT: i32.shl $push20=, $7, $pop19
-; NO-SIMD128-NEXT: i32.store8 6($0), $pop20
-; NO-SIMD128-NEXT: i32.const $push38=, 255
-; NO-SIMD128-NEXT: i32.and $push21=, $22, $pop38
-; NO-SIMD128-NEXT: i32.shl $push22=, $6, $pop21
-; NO-SIMD128-NEXT: i32.store8 5($0), $pop22
-; NO-SIMD128-NEXT: i32.const $push37=, 255
-; NO-SIMD128-NEXT: i32.and $push23=, $21, $pop37
-; NO-SIMD128-NEXT: i32.shl $push24=, $5, $pop23
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop24
-; NO-SIMD128-NEXT: i32.const $push36=, 255
-; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop36
-; NO-SIMD128-NEXT: i32.shl $push26=, $4, $pop25
-; NO-SIMD128-NEXT: i32.store8 3($0), $pop26
-; NO-SIMD128-NEXT: i32.const $push35=, 255
-; NO-SIMD128-NEXT: i32.and $push27=, $19, $pop35
-; NO-SIMD128-NEXT: i32.shl $push28=, $3, $pop27
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop28
-; NO-SIMD128-NEXT: i32.const $push34=, 255
-; NO-SIMD128-NEXT: i32.and $push29=, $18, $pop34
-; NO-SIMD128-NEXT: i32.shl $push30=, $2, $pop29
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop30
-; NO-SIMD128-NEXT: i32.const $push33=, 255
-; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop33
-; NO-SIMD128-NEXT: i32.shl $push32=, $1, $pop31
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop32
+; NO-SIMD128-NEXT: i32.shl $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.shl $push1=, $15, $31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.shl $push2=, $14, $30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.shl $push3=, $13, $29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.shl $push4=, $12, $28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.shl $push5=, $11, $27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.shl $push6=, $10, $26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.shl $push7=, $9, $25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.shl $push8=, $8, $24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.shl $push9=, $7, $23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.shl $push10=, $6, $22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.shl $push11=, $5, $21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.shl $push12=, $4, $20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.shl $push13=, $3, $19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.shl $push14=, $2, $18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.shl $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_vec_v16i8:
; NO-SIMD128-FAST: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0
-; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop47
-; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3
-; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $po...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/170572
More information about the llvm-commits
mailing list