[llvm] aa63949 - [WebAssembly] Avoid dot for v16i8 partial_smla (#163796)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 01:12:03 PDT 2025
Author: Sam Parker
Date: 2025-10-20T09:12:00+01:00
New Revision: aa63949428b3e3052e06214ce97eeee84d48567b
URL: https://github.com/llvm/llvm-project/commit/aa63949428b3e3052e06214ce97eeee84d48567b
DIFF: https://github.com/llvm/llvm-project/commit/aa63949428b3e3052e06214ce97eeee84d48567b.diff
LOG: [WebAssembly] Avoid dot for v16i8 partial_smla (#163796)
The sequence is shorter, by two extend operations, if we just use extmul
and extadd_pairwise.
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index ed54404db9160..784062066ed64 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1583,11 +1583,9 @@ def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v8i16 V128:$lhs),
// MLA: v16i8 -> v4i32
def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs),
(v16i8 V128:$rhs))),
- (ADD_I32x4 (ADD_I32x4 (DOT (extend_low_s_I16x8 $lhs),
- (extend_low_s_I16x8 $rhs)),
- (DOT (extend_high_s_I16x8 $lhs),
- (extend_high_s_I16x8 $rhs))),
- $acc)>;
+ (ADD_I32x4 (ADD_I32x4 (extadd_pairwise_s_I32x4 (EXTMUL_LOW_S_I16x8 $lhs, $rhs)),
+ (extadd_pairwise_s_I32x4 (EXTMUL_HIGH_S_I16x8 $lhs, $rhs))),
+ $acc)>;
def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v16i8 V128:$lhs),
(v16i8 V128:$rhs))),
(ADD_I32x4 (ADD_I32x4 (extadd_pairwise_u_I32x4 (EXTMUL_LOW_U_I16x8 $lhs, $rhs)),
diff --git a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
index 314e1b4fc69a1..91cd3dd1ca4e7 100644
--- a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
+++ b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
@@ -14,13 +14,11 @@ define hidden i32 @i32_mac_s8(ptr nocapture noundef readonly %a, ptr nocapture n
; CHECK: i32x4.add
; MAX-BANDWIDTH: v128.load
-; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
; MAX-BANDWIDTH: v128.load
-; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s
-; MAX-BANDWIDTH: i32x4.dot_i16x8_s
-; MAX-BANDWIDTH: i16x8.extend_high_i8x16_s
-; MAX-BANDWIDTH: i16x8.extend_high_i8x16_s
-; MAX-BANDWIDTH: i32x4.dot_i16x8_s
+; MAX-BANDWIDTH: i16x8.extmul_low_i8x16_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
+; MAX-BANDWIDTH: i16x8.extmul_high_i8x16_s
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s
; MAX-BANDWIDTH: i32x4.add
; MAX-BANDWIDTH: i32x4.add
More information about the llvm-commits
mailing list