[llvm] f2f5845 - [WebAssembly][FastISel] Fold AND mask operations into ZExt load (#183743)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 19:30:53 PDT 2026
Author: hanbeom
Date: 2026-03-11T11:30:48+09:00
New Revision: f2f5845f195aa5bb25d28702daadc628cfdeabea
URL: https://github.com/llvm/llvm-project/commit/f2f5845f195aa5bb25d28702daadc628cfdeabea
DIFF: https://github.com/llvm/llvm-project/commit/f2f5845f195aa5bb25d28702daadc628cfdeabea.diff
LOG: [WebAssembly][FastISel] Fold AND mask operations into ZExt load (#183743)
FastISel emits separate load and AND instructions for bitmasking.
(before) %1:i32 = LOAD_I32 %addr; %2:i32 = AND_I32 %1, 255
Fold AND masks into ZExt loads by verifying operands with
maskTrailingOnes. A getFoldedLoadOpcode wrapper is implemented
to manage dispatching logic for better extensibility.
(after) %1:i32 = LOAD8_U_I32 %addr
Fixed: https://github.com/llvm/llvm-project/issues/180783
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
llvm/test/CodeGen/WebAssembly/load-ext.ll
llvm/test/CodeGen/WebAssembly/offset-fastisel.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index fc73785d9b44b..daa95d21d9531 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -1309,12 +1309,73 @@ static unsigned getSExtLoadOpcode(unsigned Opc, bool A64) {
return Opc;
}
+static unsigned getZExtLoadOpcodeFromAnd(MachineInstr *MI,
+ MachineRegisterInfo &MRI,
+ const LoadInst *LI, bool A64) {
+ uint64_t Mask = 0;
+ bool IsConstant = false;
+ for (unsigned I = 1; I <= 2; ++I) {
+ Register Reg = MI->getOperand(I).getReg();
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
+ if (DefMI && (DefMI->getOpcode() == WebAssembly::CONST_I32 ||
+ DefMI->getOpcode() == WebAssembly::CONST_I64)) {
+ Mask = DefMI->getOperand(1).getImm();
+ IsConstant = true;
+ break;
+ }
+ }
+
+ if (!IsConstant)
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ unsigned LoadSize = LI->getType()->getPrimitiveSizeInBits();
+ if (Mask != llvm::maskTrailingOnes<uint64_t>(LoadSize))
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ if (MI->getOpcode() == WebAssembly::AND_I32) {
+ if (LoadSize == 8)
+ return A64 ? WebAssembly::LOAD8_U_I32_A64 : WebAssembly::LOAD8_U_I32_A32;
+ if (LoadSize == 16)
+ return A64 ? WebAssembly::LOAD16_U_I32_A64
+ : WebAssembly::LOAD16_U_I32_A32;
+ } else if (MI->getOpcode() == WebAssembly::AND_I64) {
+ if (LoadSize == 8)
+ return A64 ? WebAssembly::LOAD8_U_I64_A64 : WebAssembly::LOAD8_U_I64_A32;
+ if (LoadSize == 16)
+ return A64 ? WebAssembly::LOAD16_U_I64_A64
+ : WebAssembly::LOAD16_U_I64_A32;
+ if (LoadSize == 32)
+ return A64 ? WebAssembly::LOAD32_U_I64_A64
+ : WebAssembly::LOAD32_U_I64_A32;
+ }
+
+ return WebAssembly::INSTRUCTION_LIST_END;
+}
+
+static unsigned getFoldedLoadOpcode(MachineInstr *MI, MachineRegisterInfo &MRI,
+ const LoadInst *LI, bool A64) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::I32_EXTEND8_S_I32:
+ case WebAssembly::I32_EXTEND16_S_I32:
+ case WebAssembly::I64_EXTEND8_S_I64:
+ case WebAssembly::I64_EXTEND16_S_I64:
+ case WebAssembly::I64_EXTEND32_S_I64:
+ case WebAssembly::I64_EXTEND_S_I32:
+ return getSExtLoadOpcode(MI->getOpcode(), A64);
+ case WebAssembly::AND_I32:
+ case WebAssembly::AND_I64:
+ return getZExtLoadOpcodeFromAnd(MI, MRI, LI, A64);
+ default:
+ return WebAssembly::INSTRUCTION_LIST_END;
+ }
+}
+
bool WebAssemblyFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
bool A64 = Subtarget->hasAddr64();
- unsigned NewOpc;
- if ((NewOpc = getSExtLoadOpcode(MI->getOpcode(), A64)) ==
- WebAssembly::INSTRUCTION_LIST_END)
+ MachineRegisterInfo &MRI = FuncInfo.MF->getRegInfo();
+ unsigned NewOpc = getFoldedLoadOpcode(MI, MRI, LI, A64);
+ if (NewOpc == WebAssembly::INSTRUCTION_LIST_END)
return false;
Register ResultReg = MI->getOperand(0).getReg();
diff --git a/llvm/test/CodeGen/WebAssembly/load-ext.ll b/llvm/test/CodeGen/WebAssembly/load-ext.ll
index 73a0b20fd9e29..b4adeee3a12e3 100644
--- a/llvm/test/CodeGen/WebAssembly/load-ext.ll
+++ b/llvm/test/CodeGen/WebAssembly/load-ext.ll
@@ -378,18 +378,14 @@ define i32 @zext_i8_i32(ptr %p) {
; WASM32-FAST-LABEL: zext_i8_i32:
; WASM32-FAST: .functype zext_i8_i32 (i32) -> (i32)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.load8_u $push2=, 0($0)
-; WASM32-FAST-NEXT: i32.const $push0=, 255
-; WASM32-FAST-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM32-FAST-NEXT: return $pop1
+; WASM32-FAST-NEXT: i32.load8_u $push0=, 0($0)
+; WASM32-FAST-NEXT: return $pop0
;
; WASM32-FAST-MVP-LABEL: zext_i8_i32:
; WASM32-FAST-MVP: .functype zext_i8_i32 (i32) -> (i32)
; WASM32-FAST-MVP-NEXT: # %bb.0:
-; WASM32-FAST-MVP-NEXT: i32.load8_u $push2=, 0($0)
-; WASM32-FAST-MVP-NEXT: i32.const $push0=, 255
-; WASM32-FAST-MVP-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM32-FAST-MVP-NEXT: return $pop1
+; WASM32-FAST-MVP-NEXT: i32.load8_u $push0=, 0($0)
+; WASM32-FAST-MVP-NEXT: return $pop0
;
; WASM64-DAG-LABEL: zext_i8_i32:
; WASM64-DAG: .functype zext_i8_i32 (i64) -> (i32)
@@ -406,18 +402,14 @@ define i32 @zext_i8_i32(ptr %p) {
; WASM64-FAST-LABEL: zext_i8_i32:
; WASM64-FAST: .functype zext_i8_i32 (i64) -> (i32)
; WASM64-FAST-NEXT: # %bb.0:
-; WASM64-FAST-NEXT: i32.load8_u $push2=, 0($0)
-; WASM64-FAST-NEXT: i32.const $push0=, 255
-; WASM64-FAST-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM64-FAST-NEXT: return $pop1
+; WASM64-FAST-NEXT: i32.load8_u $push0=, 0($0)
+; WASM64-FAST-NEXT: return $pop0
;
; WASM64-FAST-MVP-LABEL: zext_i8_i32:
; WASM64-FAST-MVP: .functype zext_i8_i32 (i64) -> (i32)
; WASM64-FAST-MVP-NEXT: # %bb.0:
-; WASM64-FAST-MVP-NEXT: i32.load8_u $push2=, 0($0)
-; WASM64-FAST-MVP-NEXT: i32.const $push0=, 255
-; WASM64-FAST-MVP-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM64-FAST-MVP-NEXT: return $pop1
+; WASM64-FAST-MVP-NEXT: i32.load8_u $push0=, 0($0)
+; WASM64-FAST-MVP-NEXT: return $pop0
%v = load i8, ptr %p
%e = zext i8 %v to i32
ret i32 %e
@@ -500,18 +492,14 @@ define i32 @zext_i16_i32(ptr %p) {
; WASM32-FAST-LABEL: zext_i16_i32:
; WASM32-FAST: .functype zext_i16_i32 (i32) -> (i32)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.load16_u $push2=, 0($0)
-; WASM32-FAST-NEXT: i32.const $push0=, 65535
-; WASM32-FAST-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM32-FAST-NEXT: return $pop1
+; WASM32-FAST-NEXT: i32.load16_u $push0=, 0($0)
+; WASM32-FAST-NEXT: return $pop0
;
; WASM32-FAST-MVP-LABEL: zext_i16_i32:
; WASM32-FAST-MVP: .functype zext_i16_i32 (i32) -> (i32)
; WASM32-FAST-MVP-NEXT: # %bb.0:
-; WASM32-FAST-MVP-NEXT: i32.load16_u $push2=, 0($0)
-; WASM32-FAST-MVP-NEXT: i32.const $push0=, 65535
-; WASM32-FAST-MVP-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM32-FAST-MVP-NEXT: return $pop1
+; WASM32-FAST-MVP-NEXT: i32.load16_u $push0=, 0($0)
+; WASM32-FAST-MVP-NEXT: return $pop0
;
; WASM64-DAG-LABEL: zext_i16_i32:
; WASM64-DAG: .functype zext_i16_i32 (i64) -> (i32)
@@ -528,18 +516,14 @@ define i32 @zext_i16_i32(ptr %p) {
; WASM64-FAST-LABEL: zext_i16_i32:
; WASM64-FAST: .functype zext_i16_i32 (i64) -> (i32)
; WASM64-FAST-NEXT: # %bb.0:
-; WASM64-FAST-NEXT: i32.load16_u $push2=, 0($0)
-; WASM64-FAST-NEXT: i32.const $push0=, 65535
-; WASM64-FAST-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM64-FAST-NEXT: return $pop1
+; WASM64-FAST-NEXT: i32.load16_u $push0=, 0($0)
+; WASM64-FAST-NEXT: return $pop0
;
; WASM64-FAST-MVP-LABEL: zext_i16_i32:
; WASM64-FAST-MVP: .functype zext_i16_i32 (i64) -> (i32)
; WASM64-FAST-MVP-NEXT: # %bb.0:
-; WASM64-FAST-MVP-NEXT: i32.load16_u $push2=, 0($0)
-; WASM64-FAST-MVP-NEXT: i32.const $push0=, 65535
-; WASM64-FAST-MVP-NEXT: i32.and $push1=, $pop2, $pop0
-; WASM64-FAST-MVP-NEXT: return $pop1
+; WASM64-FAST-MVP-NEXT: i32.load16_u $push0=, 0($0)
+; WASM64-FAST-MVP-NEXT: return $pop0
%v = load i16, ptr %p
%e = zext i16 %v to i32
ret i32 %e
@@ -628,20 +612,16 @@ define i64 @zext_i8_i64(ptr %p) {
; WASM32-FAST-LABEL: zext_i8_i64:
; WASM32-FAST: .functype zext_i8_i64 (i32) -> (i64)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.load8_u $push3=, 0($0)
-; WASM32-FAST-NEXT: i32.const $push0=, 255
-; WASM32-FAST-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM32-FAST-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM32-FAST-NEXT: return $pop2
+; WASM32-FAST-NEXT: i32.load8_u $push0=, 0($0)
+; WASM32-FAST-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM32-FAST-NEXT: return $pop1
;
; WASM32-FAST-MVP-LABEL: zext_i8_i64:
; WASM32-FAST-MVP: .functype zext_i8_i64 (i32) -> (i64)
; WASM32-FAST-MVP-NEXT: # %bb.0:
-; WASM32-FAST-MVP-NEXT: i32.load8_u $push3=, 0($0)
-; WASM32-FAST-MVP-NEXT: i32.const $push0=, 255
-; WASM32-FAST-MVP-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM32-FAST-MVP-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM32-FAST-MVP-NEXT: return $pop2
+; WASM32-FAST-MVP-NEXT: i32.load8_u $push0=, 0($0)
+; WASM32-FAST-MVP-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM32-FAST-MVP-NEXT: return $pop1
;
; WASM64-DAG-LABEL: zext_i8_i64:
; WASM64-DAG: .functype zext_i8_i64 (i64) -> (i64)
@@ -658,20 +638,16 @@ define i64 @zext_i8_i64(ptr %p) {
; WASM64-FAST-LABEL: zext_i8_i64:
; WASM64-FAST: .functype zext_i8_i64 (i64) -> (i64)
; WASM64-FAST-NEXT: # %bb.0:
-; WASM64-FAST-NEXT: i32.load8_u $push3=, 0($0)
-; WASM64-FAST-NEXT: i32.const $push0=, 255
-; WASM64-FAST-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM64-FAST-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM64-FAST-NEXT: return $pop2
+; WASM64-FAST-NEXT: i32.load8_u $push0=, 0($0)
+; WASM64-FAST-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM64-FAST-NEXT: return $pop1
;
; WASM64-FAST-MVP-LABEL: zext_i8_i64:
; WASM64-FAST-MVP: .functype zext_i8_i64 (i64) -> (i64)
; WASM64-FAST-MVP-NEXT: # %bb.0:
-; WASM64-FAST-MVP-NEXT: i32.load8_u $push3=, 0($0)
-; WASM64-FAST-MVP-NEXT: i32.const $push0=, 255
-; WASM64-FAST-MVP-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM64-FAST-MVP-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM64-FAST-MVP-NEXT: return $pop2
+; WASM64-FAST-MVP-NEXT: i32.load8_u $push0=, 0($0)
+; WASM64-FAST-MVP-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM64-FAST-MVP-NEXT: return $pop1
%v = load i8, ptr %p
%e = zext i8 %v to i64
ret i64 %e
@@ -760,20 +736,16 @@ define i64 @zext_i16_i64(ptr %p) {
; WASM32-FAST-LABEL: zext_i16_i64:
; WASM32-FAST: .functype zext_i16_i64 (i32) -> (i64)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.load16_u $push3=, 0($0)
-; WASM32-FAST-NEXT: i32.const $push0=, 65535
-; WASM32-FAST-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM32-FAST-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM32-FAST-NEXT: return $pop2
+; WASM32-FAST-NEXT: i32.load16_u $push0=, 0($0)
+; WASM32-FAST-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM32-FAST-NEXT: return $pop1
;
; WASM32-FAST-MVP-LABEL: zext_i16_i64:
; WASM32-FAST-MVP: .functype zext_i16_i64 (i32) -> (i64)
; WASM32-FAST-MVP-NEXT: # %bb.0:
-; WASM32-FAST-MVP-NEXT: i32.load16_u $push3=, 0($0)
-; WASM32-FAST-MVP-NEXT: i32.const $push0=, 65535
-; WASM32-FAST-MVP-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM32-FAST-MVP-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM32-FAST-MVP-NEXT: return $pop2
+; WASM32-FAST-MVP-NEXT: i32.load16_u $push0=, 0($0)
+; WASM32-FAST-MVP-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM32-FAST-MVP-NEXT: return $pop1
;
; WASM64-DAG-LABEL: zext_i16_i64:
; WASM64-DAG: .functype zext_i16_i64 (i64) -> (i64)
@@ -790,20 +762,16 @@ define i64 @zext_i16_i64(ptr %p) {
; WASM64-FAST-LABEL: zext_i16_i64:
; WASM64-FAST: .functype zext_i16_i64 (i64) -> (i64)
; WASM64-FAST-NEXT: # %bb.0:
-; WASM64-FAST-NEXT: i32.load16_u $push3=, 0($0)
-; WASM64-FAST-NEXT: i32.const $push0=, 65535
-; WASM64-FAST-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM64-FAST-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM64-FAST-NEXT: return $pop2
+; WASM64-FAST-NEXT: i32.load16_u $push0=, 0($0)
+; WASM64-FAST-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM64-FAST-NEXT: return $pop1
;
; WASM64-FAST-MVP-LABEL: zext_i16_i64:
; WASM64-FAST-MVP: .functype zext_i16_i64 (i64) -> (i64)
; WASM64-FAST-MVP-NEXT: # %bb.0:
-; WASM64-FAST-MVP-NEXT: i32.load16_u $push3=, 0($0)
-; WASM64-FAST-MVP-NEXT: i32.const $push0=, 65535
-; WASM64-FAST-MVP-NEXT: i32.and $push1=, $pop3, $pop0
-; WASM64-FAST-MVP-NEXT: i64.extend_i32_u $push2=, $pop1
-; WASM64-FAST-MVP-NEXT: return $pop2
+; WASM64-FAST-MVP-NEXT: i32.load16_u $push0=, 0($0)
+; WASM64-FAST-MVP-NEXT: i64.extend_i32_u $push1=, $pop0
+; WASM64-FAST-MVP-NEXT: return $pop1
%v = load i16, ptr %p
%e = zext i16 %v to i64
ret i64 %e
diff --git a/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll b/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll
index cb68a2029fa9e..e24e9954de9bb 100644
--- a/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll
+++ b/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll
@@ -104,9 +104,7 @@ define i32 @load_i8_u_with_folded_offset(ptr %p) {
; CHECK-LABEL: load_i8_u_with_folded_offset:
; CHECK: .functype load_i8_u_with_folded_offset (i32) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.load8_u $push2=, 24($0)
-; CHECK-NEXT: i32.const $push0=, 255
-; CHECK-NEXT: i32.and $push1=, $pop2, $pop0
+; CHECK-NEXT: i32.load8_u $push0=, 24($0)
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint ptr %p to i32
%r = add nuw i32 %q, 24
More information about the llvm-commits
mailing list