[llvm] [WebAssembly] Add support for avgr_u in loops (PR #153252)
Jasmine Tang via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 14 14:32:11 PDT 2025
https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/153252
>From 4a82aea026ede56dfe5010fb4310fec61cf41d63 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Tue, 12 Aug 2025 11:53:15 -0700
Subject: [PATCH 1/3] Precommit test for avgr pattern
---
llvm/test/CodeGen/WebAssembly/simd-avgr.ll | 230 +++++++++++++++++++++
1 file changed, 230 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/simd-avgr.ll
diff --git a/llvm/test/CodeGen/WebAssembly/simd-avgr.ll b/llvm/test/CodeGen/WebAssembly/simd-avgr.ll
new file mode 100644
index 0000000000000..ac49821a57966
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-avgr.ll
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O2 -mtriple=wasm32 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+
+;void f(unsigned char *x, unsigned char *y, int n) {
+; for (int i = 0; i < n; i++)
+; x[i] = (x[i] + y[i] + 1) / 2;
+;}
+
+define void @f(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: f:
+; CHECK: .functype f (i32, i32, i32) -> ()
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: block
+; CHECK-NEXT: i32.const $push0=, 1
+; CHECK-NEXT: i32.lt_s $push1=, $2, $pop0
+; CHECK-NEXT: br_if 0, $pop1 # 0: down to label0
+; CHECK-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-NEXT: i32.const $5=, 0
+; CHECK-NEXT: block
+; CHECK-NEXT: i32.const $push2=, 16
+; CHECK-NEXT: i32.lt_u $push3=, $2, $pop2
+; CHECK-NEXT: br_if 0, $pop3 # 0: down to label1
+; CHECK-NEXT: # %bb.2: # %vector.memcheck
+; CHECK-NEXT: block
+; CHECK-NEXT: i32.add $push5=, $1, $2
+; CHECK-NEXT: i32.ge_u $push6=, $0, $pop5
+; CHECK-NEXT: br_if 0, $pop6 # 0: down to label2
+; CHECK-NEXT: # %bb.3: # %vector.memcheck
+; CHECK-NEXT: i32.add $push4=, $0, $2
+; CHECK-NEXT: i32.lt_u $push7=, $1, $pop4
+; CHECK-NEXT: br_if 1, $pop7 # 1: down to label1
+; CHECK-NEXT: .LBB0_4: # %vector.ph
+; CHECK-NEXT: end_block # label2:
+; CHECK-NEXT: local.copy $6=, $0
+; CHECK-NEXT: local.copy $7=, $1
+; CHECK-NEXT: i32.const $push8=, 2147483632
+; CHECK-NEXT: i32.and $push34=, $2, $pop8
+; CHECK-NEXT: local.tee $push33=, $5=, $pop34
+; CHECK-NEXT: local.copy $8=, $pop33
+; CHECK-NEXT: .LBB0_5: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: loop # label3:
+; CHECK-NEXT: v128.load $push44=, 0($6):p2align=0
+; CHECK-NEXT: local.tee $push43=, $4=, $pop44
+; CHECK-NEXT: v128.load $push42=, 0($7):p2align=0
+; CHECK-NEXT: local.tee $push41=, $3=, $pop42
+; CHECK-NEXT: v128.or $push9=, $pop43, $pop41
+; CHECK-NEXT: v128.xor $push10=, $4, $3
+; CHECK-NEXT: i32.const $push40=, 1
+; CHECK-NEXT: i8x16.shr_u $push11=, $pop10, $pop40
+; CHECK-NEXT: i8x16.sub $push12=, $pop9, $pop11
+; CHECK-NEXT: v128.store 0($6):p2align=0, $pop12
+; CHECK-NEXT: i32.const $push39=, 16
+; CHECK-NEXT: i32.add $6=, $6, $pop39
+; CHECK-NEXT: i32.const $push38=, 16
+; CHECK-NEXT: i32.add $7=, $7, $pop38
+; CHECK-NEXT: i32.const $push37=, -16
+; CHECK-NEXT: i32.add $push36=, $8, $pop37
+; CHECK-NEXT: local.tee $push35=, $8=, $pop36
+; CHECK-NEXT: br_if 0, $pop35 # 0: up to label3
+; CHECK-NEXT: # %bb.6: # %middle.block
+; CHECK-NEXT: end_loop
+; CHECK-NEXT: i32.eq $push13=, $2, $5
+; CHECK-NEXT: br_if 1, $pop13 # 1: down to label0
+; CHECK-NEXT: .LBB0_7: # %for.body.preheader16
+; CHECK-NEXT: end_block # label1:
+; CHECK-NEXT: i32.const $push46=, 1
+; CHECK-NEXT: i32.or $6=, $5, $pop46
+; CHECK-NEXT: block
+; CHECK-NEXT: i32.const $push45=, 1
+; CHECK-NEXT: i32.and $push14=, $2, $pop45
+; CHECK-NEXT: i32.eqz $push64=, $pop14
+; CHECK-NEXT: br_if 0, $pop64 # 0: down to label4
+; CHECK-NEXT: # %bb.8: # %for.body.prol
+; CHECK-NEXT: i32.add $push50=, $0, $5
+; CHECK-NEXT: local.tee $push49=, $7=, $pop50
+; CHECK-NEXT: i32.load8_u $push17=, 0($7)
+; CHECK-NEXT: i32.add $push15=, $1, $5
+; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
+; CHECK-NEXT: i32.add $push18=, $pop17, $pop16
+; CHECK-NEXT: i32.const $push48=, 1
+; CHECK-NEXT: i32.add $push19=, $pop18, $pop48
+; CHECK-NEXT: i32.const $push47=, 1
+; CHECK-NEXT: i32.shr_u $push20=, $pop19, $pop47
+; CHECK-NEXT: i32.store8 0($pop49), $pop20
+; CHECK-NEXT: local.copy $5=, $6
+; CHECK-NEXT: .LBB0_9: # %for.body.prol.loopexit
+; CHECK-NEXT: end_block # label4:
+; CHECK-NEXT: i32.eq $push21=, $2, $6
+; CHECK-NEXT: br_if 0, $pop21 # 0: down to label0
+; CHECK-NEXT: # %bb.10: # %for.body.preheader1
+; CHECK-NEXT: i32.add $6=, $0, $5
+; CHECK-NEXT: i32.add $7=, $1, $5
+; CHECK-NEXT: i32.sub $8=, $2, $5
+; CHECK-NEXT: .LBB0_11: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: loop # label5:
+; CHECK-NEXT: i32.load8_u $push23=, 0($6)
+; CHECK-NEXT: i32.load8_u $push22=, 0($7)
+; CHECK-NEXT: i32.add $push24=, $pop23, $pop22
+; CHECK-NEXT: i32.const $push63=, 1
+; CHECK-NEXT: i32.add $push25=, $pop24, $pop63
+; CHECK-NEXT: i32.const $push62=, 1
+; CHECK-NEXT: i32.shr_u $push26=, $pop25, $pop62
+; CHECK-NEXT: i32.store8 0($6), $pop26
+; CHECK-NEXT: i32.const $push61=, 1
+; CHECK-NEXT: i32.add $push60=, $6, $pop61
+; CHECK-NEXT: local.tee $push59=, $2=, $pop60
+; CHECK-NEXT: i32.load8_u $push27=, 0($2)
+; CHECK-NEXT: i32.const $push58=, 1
+; CHECK-NEXT: i32.add $push28=, $7, $pop58
+; CHECK-NEXT: i32.load8_u $push29=, 0($pop28)
+; CHECK-NEXT: i32.add $push30=, $pop27, $pop29
+; CHECK-NEXT: i32.const $push57=, 1
+; CHECK-NEXT: i32.add $push31=, $pop30, $pop57
+; CHECK-NEXT: i32.const $push56=, 1
+; CHECK-NEXT: i32.shr_u $push32=, $pop31, $pop56
+; CHECK-NEXT: i32.store8 0($pop59), $pop32
+; CHECK-NEXT: i32.const $push55=, 2
+; CHECK-NEXT: i32.add $6=, $6, $pop55
+; CHECK-NEXT: i32.const $push54=, 2
+; CHECK-NEXT: i32.add $7=, $7, $pop54
+; CHECK-NEXT: i32.const $push53=, -2
+; CHECK-NEXT: i32.add $push52=, $8, $pop53
+; CHECK-NEXT: local.tee $push51=, $8=, $pop52
+; CHECK-NEXT: br_if 0, $pop51 # 0: up to label5
+; CHECK-NEXT: .LBB0_12: # %for.cond.cleanup
+; CHECK-NEXT: end_loop
+; CHECK-NEXT: end_block # label0:
+; CHECK-NEXT: return
+entry:
+ %cmp12 = icmp sgt i32 %n, 0
+ br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ %min.iters.check = icmp ult i32 %n, 16
+ br i1 %min.iters.check, label %for.body.preheader16, label %vector.memcheck
+
+vector.memcheck:
+ %scevgep = getelementptr i8, ptr %x, i32 %n
+ %scevgep14 = getelementptr i8, ptr %y, i32 %n
+ %bound0 = icmp ult ptr %x, %scevgep14
+ %bound1 = icmp ult ptr %y, %scevgep
+ %found.conflict = and i1 %bound0, %bound1
+ br i1 %found.conflict, label %for.body.preheader16, label %vector.ph
+
+vector.ph:
+ %n.vec = and i32 %n, 2147483632
+ br label %vector.body
+
+vector.body:
+ %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+ %0 = getelementptr inbounds nuw i8, ptr %x, i32 %index
+ %wide.load = load <16 x i8>, ptr %0, align 1
+ %1 = zext <16 x i8> %wide.load to <16 x i16>
+ %2 = getelementptr inbounds nuw i8, ptr %y, i32 %index
+ %wide.load15 = load <16 x i8>, ptr %2, align 1
+ %3 = zext <16 x i8> %wide.load15 to <16 x i16>
+ %4 = add nuw nsw <16 x i16> %1, splat (i16 1)
+ %5 = add nuw nsw <16 x i16> %4, %3
+ %6 = lshr <16 x i16> %5, splat (i16 1)
+ %7 = trunc nuw <16 x i16> %6 to <16 x i8>
+ store <16 x i8> %7, ptr %0, align 1
+ %index.next = add nuw i32 %index, 16
+ %8 = icmp eq i32 %index.next, %n.vec
+ br i1 %8, label %middle.block, label %vector.body
+
+middle.block:
+ %cmp.n = icmp eq i32 %n, %n.vec
+ br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader16
+
+for.body.preheader16:
+ %i.013.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+ %.neg = or disjoint i32 %i.013.ph, 1
+ %xtraiter = and i32 %n, 1
+ %lcmp.mod.not = icmp eq i32 %xtraiter, 0
+ br i1 %lcmp.mod.not, label %for.body.prol.loopexit, label %for.body.prol
+
+for.body.prol:
+ %arrayidx.prol = getelementptr inbounds nuw i8, ptr %x, i32 %i.013.ph
+ %9 = load i8, ptr %arrayidx.prol, align 1
+ %conv.prol = zext i8 %9 to i16
+ %arrayidx1.prol = getelementptr inbounds nuw i8, ptr %y, i32 %i.013.ph
+ %10 = load i8, ptr %arrayidx1.prol, align 1
+ %conv2.prol = zext i8 %10 to i16
+ %add.prol = add nuw nsw i16 %conv.prol, 1
+ %add3.prol = add nuw nsw i16 %add.prol, %conv2.prol
+ %div11.prol = lshr i16 %add3.prol, 1
+ %conv4.prol = trunc nuw i16 %div11.prol to i8
+ store i8 %conv4.prol, ptr %arrayidx.prol, align 1
+ %inc.prol = or disjoint i32 %i.013.ph, 1
+ br label %for.body.prol.loopexit
+
+for.body.prol.loopexit:
+ %i.013.unr = phi i32 [ %i.013.ph, %for.body.preheader16 ], [ %inc.prol, %for.body.prol ]
+ %11 = icmp eq i32 %n, %.neg
+ br i1 %11, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.013 = phi i32 [ %inc.1, %for.body ], [ %i.013.unr, %for.body.prol.loopexit ]
+ %arrayidx = getelementptr inbounds nuw i8, ptr %x, i32 %i.013
+ %12 = load i8, ptr %arrayidx, align 1
+ %conv = zext i8 %12 to i16
+ %arrayidx1 = getelementptr inbounds nuw i8, ptr %y, i32 %i.013
+ %13 = load i8, ptr %arrayidx1, align 1
+ %conv2 = zext i8 %13 to i16
+ %add = add nuw nsw i16 %conv, 1
+ %add3 = add nuw nsw i16 %add, %conv2
+ %div11 = lshr i16 %add3, 1
+ %conv4 = trunc nuw i16 %div11 to i8
+ store i8 %conv4, ptr %arrayidx, align 1
+ %inc = add nuw nsw i32 %i.013, 1
+ %arrayidx.1 = getelementptr inbounds nuw i8, ptr %x, i32 %inc
+ %14 = load i8, ptr %arrayidx.1, align 1
+ %conv.1 = zext i8 %14 to i16
+ %arrayidx1.1 = getelementptr inbounds nuw i8, ptr %y, i32 %inc
+ %15 = load i8, ptr %arrayidx1.1, align 1
+ %conv2.1 = zext i8 %15 to i16
+ %add.1 = add nuw nsw i16 %conv.1, 1
+ %add3.1 = add nuw nsw i16 %add.1, %conv2.1
+ %div11.1 = lshr i16 %add3.1, 1
+ %conv4.1 = trunc nuw i16 %div11.1 to i8
+ store i8 %conv4.1, ptr %arrayidx.1, align 1
+ %inc.1 = add nuw nsw i32 %i.013, 2
+ %exitcond.not.1 = icmp eq i32 %inc.1, %n
+ br i1 %exitcond.not.1, label %for.cond.cleanup, label %for.body
+}
>From fe2ee69d6882aefd0e70e94c38e31991fdec57bd Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Tue, 12 Aug 2025 12:00:31 -0700
Subject: [PATCH 2/3] Support avgr_u in loop construct
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 4 +
.../WebAssembly/WebAssemblyInstrSIMD.td | 3 +
llvm/test/CodeGen/WebAssembly/simd-avgr.ll | 144 +++++++++---------
3 files changed, 76 insertions(+), 75 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 3f80b2ab2bd6d..4299313c28802 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -246,6 +246,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
MVT::v2f64})
setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
+ // Set avgceilu as legal for i8x16 and i16x8
+ // and isel will convert to AVGR_U w/ tablegen
+ setOperationAction({ISD::AVGCEILU}, {MVT::v8i16, MVT::v16i8}, Legal);
+
// Custom lowering since wasm shifts must have a scalar shift amount
for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 143298b700928..fb508e3dc9a7f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1144,6 +1144,9 @@ def : Pat<(wasm_shr_u
(vec.splat (i32 1))),
(i32 1)),
(inst $lhs, $rhs)>;
+
+def : Pat<(vec.vt(avgceilu(vec.vt V128:$lhs), (vec.vt V128:$rhs))), (inst $lhs,
+ $rhs)>;
}
// Widening dot product: i32x4.dot_i16x8_s
diff --git a/llvm/test/CodeGen/WebAssembly/simd-avgr.ll b/llvm/test/CodeGen/WebAssembly/simd-avgr.ll
index ac49821a57966..c4e6b12244e9b 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-avgr.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-avgr.ll
@@ -15,7 +15,7 @@ define void @f(ptr %x, ptr %y, i32 %n) {
; CHECK-NEXT: i32.lt_s $push1=, $2, $pop0
; CHECK-NEXT: br_if 0, $pop1 # 0: down to label0
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: i32.const $5=, 0
+; CHECK-NEXT: i32.const $3=, 0
; CHECK-NEXT: block
; CHECK-NEXT: i32.const $push2=, 16
; CHECK-NEXT: i32.lt_u $push3=, $2, $pop2
@@ -31,99 +31,93 @@ define void @f(ptr %x, ptr %y, i32 %n) {
; CHECK-NEXT: br_if 1, $pop7 # 1: down to label1
; CHECK-NEXT: .LBB0_4: # %vector.ph
; CHECK-NEXT: end_block # label2:
-; CHECK-NEXT: local.copy $6=, $0
-; CHECK-NEXT: local.copy $7=, $1
+; CHECK-NEXT: local.copy $4=, $0
+; CHECK-NEXT: local.copy $5=, $1
; CHECK-NEXT: i32.const $push8=, 2147483632
-; CHECK-NEXT: i32.and $push34=, $2, $pop8
-; CHECK-NEXT: local.tee $push33=, $5=, $pop34
-; CHECK-NEXT: local.copy $8=, $pop33
+; CHECK-NEXT: i32.and $push33=, $2, $pop8
+; CHECK-NEXT: local.tee $push32=, $3=, $pop33
+; CHECK-NEXT: local.copy $6=, $pop32
; CHECK-NEXT: .LBB0_5: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label3:
-; CHECK-NEXT: v128.load $push44=, 0($6):p2align=0
-; CHECK-NEXT: local.tee $push43=, $4=, $pop44
-; CHECK-NEXT: v128.load $push42=, 0($7):p2align=0
-; CHECK-NEXT: local.tee $push41=, $3=, $pop42
-; CHECK-NEXT: v128.or $push9=, $pop43, $pop41
-; CHECK-NEXT: v128.xor $push10=, $4, $3
-; CHECK-NEXT: i32.const $push40=, 1
-; CHECK-NEXT: i8x16.shr_u $push11=, $pop10, $pop40
-; CHECK-NEXT: i8x16.sub $push12=, $pop9, $pop11
-; CHECK-NEXT: v128.store 0($6):p2align=0, $pop12
-; CHECK-NEXT: i32.const $push39=, 16
-; CHECK-NEXT: i32.add $6=, $6, $pop39
+; CHECK-NEXT: v128.load $push10=, 0($4):p2align=0
+; CHECK-NEXT: v128.load $push9=, 0($5):p2align=0
+; CHECK-NEXT: i8x16.avgr_u $push11=, $pop10, $pop9
+; CHECK-NEXT: v128.store 0($4):p2align=0, $pop11
; CHECK-NEXT: i32.const $push38=, 16
-; CHECK-NEXT: i32.add $7=, $7, $pop38
-; CHECK-NEXT: i32.const $push37=, -16
-; CHECK-NEXT: i32.add $push36=, $8, $pop37
-; CHECK-NEXT: local.tee $push35=, $8=, $pop36
-; CHECK-NEXT: br_if 0, $pop35 # 0: up to label3
+; CHECK-NEXT: i32.add $4=, $4, $pop38
+; CHECK-NEXT: i32.const $push37=, 16
+; CHECK-NEXT: i32.add $5=, $5, $pop37
+; CHECK-NEXT: i32.const $push36=, -16
+; CHECK-NEXT: i32.add $push35=, $6, $pop36
+; CHECK-NEXT: local.tee $push34=, $6=, $pop35
+; CHECK-NEXT: br_if 0, $pop34 # 0: up to label3
; CHECK-NEXT: # %bb.6: # %middle.block
; CHECK-NEXT: end_loop
-; CHECK-NEXT: i32.eq $push13=, $2, $5
-; CHECK-NEXT: br_if 1, $pop13 # 1: down to label0
+; CHECK-NEXT: i32.eq $push12=, $2, $3
+; CHECK-NEXT: br_if 1, $pop12 # 1: down to label0
; CHECK-NEXT: .LBB0_7: # %for.body.preheader16
; CHECK-NEXT: end_block # label1:
-; CHECK-NEXT: i32.const $push46=, 1
-; CHECK-NEXT: i32.or $6=, $5, $pop46
+; CHECK-NEXT: i32.const $push40=, 1
+; CHECK-NEXT: i32.or $4=, $3, $pop40
; CHECK-NEXT: block
-; CHECK-NEXT: i32.const $push45=, 1
-; CHECK-NEXT: i32.and $push14=, $2, $pop45
-; CHECK-NEXT: i32.eqz $push64=, $pop14
-; CHECK-NEXT: br_if 0, $pop64 # 0: down to label4
+; CHECK-NEXT: i32.const $push39=, 1
+; CHECK-NEXT: i32.and $push13=, $2, $pop39
+; CHECK-NEXT: i32.eqz $push58=, $pop13
+; CHECK-NEXT: br_if 0, $pop58 # 0: down to label4
; CHECK-NEXT: # %bb.8: # %for.body.prol
-; CHECK-NEXT: i32.add $push50=, $0, $5
-; CHECK-NEXT: local.tee $push49=, $7=, $pop50
-; CHECK-NEXT: i32.load8_u $push17=, 0($7)
-; CHECK-NEXT: i32.add $push15=, $1, $5
-; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
-; CHECK-NEXT: i32.add $push18=, $pop17, $pop16
-; CHECK-NEXT: i32.const $push48=, 1
-; CHECK-NEXT: i32.add $push19=, $pop18, $pop48
-; CHECK-NEXT: i32.const $push47=, 1
-; CHECK-NEXT: i32.shr_u $push20=, $pop19, $pop47
-; CHECK-NEXT: i32.store8 0($pop49), $pop20
-; CHECK-NEXT: local.copy $5=, $6
+; CHECK-NEXT: i32.add $push44=, $0, $3
+; CHECK-NEXT: local.tee $push43=, $5=, $pop44
+; CHECK-NEXT: i32.load8_u $push16=, 0($5)
+; CHECK-NEXT: i32.add $push14=, $1, $3
+; CHECK-NEXT: i32.load8_u $push15=, 0($pop14)
+; CHECK-NEXT: i32.add $push17=, $pop16, $pop15
+; CHECK-NEXT: i32.const $push42=, 1
+; CHECK-NEXT: i32.add $push18=, $pop17, $pop42
+; CHECK-NEXT: i32.const $push41=, 1
+; CHECK-NEXT: i32.shr_u $push19=, $pop18, $pop41
+; CHECK-NEXT: i32.store8 0($pop43), $pop19
+; CHECK-NEXT: local.copy $3=, $4
; CHECK-NEXT: .LBB0_9: # %for.body.prol.loopexit
; CHECK-NEXT: end_block # label4:
-; CHECK-NEXT: i32.eq $push21=, $2, $6
-; CHECK-NEXT: br_if 0, $pop21 # 0: down to label0
+; CHECK-NEXT: i32.eq $push20=, $2, $4
+; CHECK-NEXT: br_if 0, $pop20 # 0: down to label0
; CHECK-NEXT: # %bb.10: # %for.body.preheader1
-; CHECK-NEXT: i32.add $6=, $0, $5
-; CHECK-NEXT: i32.add $7=, $1, $5
-; CHECK-NEXT: i32.sub $8=, $2, $5
+; CHECK-NEXT: i32.add $4=, $0, $3
+; CHECK-NEXT: i32.add $5=, $1, $3
+; CHECK-NEXT: i32.sub $6=, $2, $3
; CHECK-NEXT: .LBB0_11: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label5:
-; CHECK-NEXT: i32.load8_u $push23=, 0($6)
-; CHECK-NEXT: i32.load8_u $push22=, 0($7)
-; CHECK-NEXT: i32.add $push24=, $pop23, $pop22
-; CHECK-NEXT: i32.const $push63=, 1
-; CHECK-NEXT: i32.add $push25=, $pop24, $pop63
-; CHECK-NEXT: i32.const $push62=, 1
-; CHECK-NEXT: i32.shr_u $push26=, $pop25, $pop62
-; CHECK-NEXT: i32.store8 0($6), $pop26
-; CHECK-NEXT: i32.const $push61=, 1
-; CHECK-NEXT: i32.add $push60=, $6, $pop61
-; CHECK-NEXT: local.tee $push59=, $2=, $pop60
-; CHECK-NEXT: i32.load8_u $push27=, 0($2)
-; CHECK-NEXT: i32.const $push58=, 1
-; CHECK-NEXT: i32.add $push28=, $7, $pop58
-; CHECK-NEXT: i32.load8_u $push29=, 0($pop28)
-; CHECK-NEXT: i32.add $push30=, $pop27, $pop29
+; CHECK-NEXT: i32.load8_u $push22=, 0($4)
+; CHECK-NEXT: i32.load8_u $push21=, 0($5)
+; CHECK-NEXT: i32.add $push23=, $pop22, $pop21
; CHECK-NEXT: i32.const $push57=, 1
-; CHECK-NEXT: i32.add $push31=, $pop30, $pop57
+; CHECK-NEXT: i32.add $push24=, $pop23, $pop57
; CHECK-NEXT: i32.const $push56=, 1
-; CHECK-NEXT: i32.shr_u $push32=, $pop31, $pop56
-; CHECK-NEXT: i32.store8 0($pop59), $pop32
-; CHECK-NEXT: i32.const $push55=, 2
-; CHECK-NEXT: i32.add $6=, $6, $pop55
-; CHECK-NEXT: i32.const $push54=, 2
-; CHECK-NEXT: i32.add $7=, $7, $pop54
-; CHECK-NEXT: i32.const $push53=, -2
-; CHECK-NEXT: i32.add $push52=, $8, $pop53
-; CHECK-NEXT: local.tee $push51=, $8=, $pop52
-; CHECK-NEXT: br_if 0, $pop51 # 0: up to label5
+; CHECK-NEXT: i32.shr_u $push25=, $pop24, $pop56
+; CHECK-NEXT: i32.store8 0($4), $pop25
+; CHECK-NEXT: i32.const $push55=, 1
+; CHECK-NEXT: i32.add $push54=, $4, $pop55
+; CHECK-NEXT: local.tee $push53=, $2=, $pop54
+; CHECK-NEXT: i32.load8_u $push26=, 0($2)
+; CHECK-NEXT: i32.const $push52=, 1
+; CHECK-NEXT: i32.add $push27=, $5, $pop52
+; CHECK-NEXT: i32.load8_u $push28=, 0($pop27)
+; CHECK-NEXT: i32.add $push29=, $pop26, $pop28
+; CHECK-NEXT: i32.const $push51=, 1
+; CHECK-NEXT: i32.add $push30=, $pop29, $pop51
+; CHECK-NEXT: i32.const $push50=, 1
+; CHECK-NEXT: i32.shr_u $push31=, $pop30, $pop50
+; CHECK-NEXT: i32.store8 0($pop53), $pop31
+; CHECK-NEXT: i32.const $push49=, 2
+; CHECK-NEXT: i32.add $4=, $4, $pop49
+; CHECK-NEXT: i32.const $push48=, 2
+; CHECK-NEXT: i32.add $5=, $5, $pop48
+; CHECK-NEXT: i32.const $push47=, -2
+; CHECK-NEXT: i32.add $push46=, $6, $pop47
+; CHECK-NEXT: local.tee $push45=, $6=, $pop46
+; CHECK-NEXT: br_if 0, $pop45 # 0: up to label5
; CHECK-NEXT: .LBB0_12: # %for.cond.cleanup
; CHECK-NEXT: end_loop
; CHECK-NEXT: end_block # label0:
>From b2345ea959e4a37a0fecf505a80a26a337361c8b Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Thu, 14 Aug 2025 14:15:51 -0700
Subject: [PATCH 3/3] Address PR reviews
---
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4299313c28802..dfe02cfed346b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -246,9 +246,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
MVT::v2f64})
setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
- // Set avgceilu as legal for i8x16 and i16x8
- // and isel will convert to AVGR_U w/ tablegen
- setOperationAction({ISD::AVGCEILU}, {MVT::v8i16, MVT::v16i8}, Legal);
+ setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
// Custom lowering since wasm shifts must have a scalar shift amount
for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
More information about the llvm-commits
mailing list