[llvm] 7fcee5f - [WebAssembly] Add support for avgr_u in loops (#153252)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 09:52:53 PDT 2025
Author: Jasmine Tang
Date: 2025-08-22T09:52:49-07:00
New Revision: 7fcee5fe082bc80864734b382215e8f0ee3563d5
URL: https://github.com/llvm/llvm-project/commit/7fcee5fe082bc80864734b382215e8f0ee3563d5
DIFF: https://github.com/llvm/llvm-project/commit/7fcee5fe082bc80864734b382215e8f0ee3563d5.diff
LOG: [WebAssembly] Add support for avgr_u in loops (#153252)
Fixes https://github.com/llvm/llvm-project/issues/150550.
With the test case
```
void f(unsigned char *x, unsigned char *y, int n) {
// should have been vectorized into avgr_u instead of seperated vectorized add and logical right shift
for (int i = 0; i < n; i++)
x[i] = (x[i] + y[i] + 1) / 2;
}
```
the backend failed to recognize that this can be reduced to avgr_u since
the loop vectorizer doesn't transform into the existing pattern in
tablegen.
This PR sets AVGCEIL_U as legal for v8i16 and v16i8 and selects it to
avgr_u in the tablegen file.
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-arith.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 35d5c3ed90c91..c6c2d0cfccb64 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -246,6 +246,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
MVT::v2f64})
setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
+ setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
+
// Custom lowering since wasm shifts must have a scalar shift amount
for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 143298b700928..f06f8d5174e3e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1144,6 +1144,9 @@ def : Pat<(wasm_shr_u
(vec.splat (i32 1))),
(i32 1)),
(inst $lhs, $rhs)>;
+
+def : Pat<(vec.vt (avgceilu (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
+ (inst $lhs, $rhs)>;
}
// Widening dot product: i32x4.dot_i16x8_s
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 36637e1d555bd..324a0c49fb413 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -1451,6 +1451,547 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
ret <16 x i8> %c
}
+define <16 x i8> @avgr_u_v16i8_zext(<16 x i8> %x, <16 x i8> %y) {
+; SIMD128-LABEL: avgr_u_v16i8_zext:
+; SIMD128: .functype avgr_u_v16i8_zext (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: i8x16.avgr_u $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: avgr_u_v16i8_zext:
+; SIMD128-FAST: .functype avgr_u_v16i8_zext (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: i8x16.avgr_u $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: avgr_u_v16i8_zext:
+; NO-SIMD128: .functype avgr_u_v16i8_zext (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.const $push0=, 255
+; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0
+; NO-SIMD128-NEXT: i32.const $push143=, 255
+; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop143
+; NO-SIMD128-NEXT: i32.add $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.const $push4=, 1
+; NO-SIMD128-NEXT: i32.add $push5=, $pop3, $pop4
+; NO-SIMD128-NEXT: i32.const $push142=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop142
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push141=, 255
+; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop141
+; NO-SIMD128-NEXT: i32.const $push140=, 255
+; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop140
+; NO-SIMD128-NEXT: i32.add $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.const $push139=, 1
+; NO-SIMD128-NEXT: i32.add $push10=, $pop9, $pop139
+; NO-SIMD128-NEXT: i32.const $push138=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push11=, $pop10, $pop138
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop11
+; NO-SIMD128-NEXT: i32.const $push137=, 255
+; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop137
+; NO-SIMD128-NEXT: i32.const $push136=, 255
+; NO-SIMD128-NEXT: i32.and $push12=, $30, $pop136
+; NO-SIMD128-NEXT: i32.add $push14=, $pop13, $pop12
+; NO-SIMD128-NEXT: i32.const $push135=, 1
+; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop135
+; NO-SIMD128-NEXT: i32.const $push134=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $pop134
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop16
+; NO-SIMD128-NEXT: i32.const $push133=, 255
+; NO-SIMD128-NEXT: i32.and $push18=, $13, $pop133
+; NO-SIMD128-NEXT: i32.const $push132=, 255
+; NO-SIMD128-NEXT: i32.and $push17=, $29, $pop132
+; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.const $push131=, 1
+; NO-SIMD128-NEXT: i32.add $push20=, $pop19, $pop131
+; NO-SIMD128-NEXT: i32.const $push130=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop130
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop21
+; NO-SIMD128-NEXT: i32.const $push129=, 255
+; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop129
+; NO-SIMD128-NEXT: i32.const $push128=, 255
+; NO-SIMD128-NEXT: i32.and $push22=, $28, $pop128
+; NO-SIMD128-NEXT: i32.add $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.const $push127=, 1
+; NO-SIMD128-NEXT: i32.add $push25=, $pop24, $pop127
+; NO-SIMD128-NEXT: i32.const $push126=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push26=, $pop25, $pop126
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop26
+; NO-SIMD128-NEXT: i32.const $push125=, 255
+; NO-SIMD128-NEXT: i32.and $push28=, $11, $pop125
+; NO-SIMD128-NEXT: i32.const $push124=, 255
+; NO-SIMD128-NEXT: i32.and $push27=, $27, $pop124
+; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop27
+; NO-SIMD128-NEXT: i32.const $push123=, 1
+; NO-SIMD128-NEXT: i32.add $push30=, $pop29, $pop123
+; NO-SIMD128-NEXT: i32.const $push122=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop122
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop31
+; NO-SIMD128-NEXT: i32.const $push121=, 255
+; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop121
+; NO-SIMD128-NEXT: i32.const $push120=, 255
+; NO-SIMD128-NEXT: i32.and $push32=, $26, $pop120
+; NO-SIMD128-NEXT: i32.add $push34=, $pop33, $pop32
+; NO-SIMD128-NEXT: i32.const $push119=, 1
+; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop119
+; NO-SIMD128-NEXT: i32.const $push118=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $pop118
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop36
+; NO-SIMD128-NEXT: i32.const $push117=, 255
+; NO-SIMD128-NEXT: i32.and $push38=, $9, $pop117
+; NO-SIMD128-NEXT: i32.const $push116=, 255
+; NO-SIMD128-NEXT: i32.and $push37=, $25, $pop116
+; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop37
+; NO-SIMD128-NEXT: i32.const $push115=, 1
+; NO-SIMD128-NEXT: i32.add $push40=, $pop39, $pop115
+; NO-SIMD128-NEXT: i32.const $push114=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop114
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop41
+; NO-SIMD128-NEXT: i32.const $push113=, 255
+; NO-SIMD128-NEXT: i32.and $push43=, $8, $pop113
+; NO-SIMD128-NEXT: i32.const $push112=, 255
+; NO-SIMD128-NEXT: i32.and $push42=, $24, $pop112
+; NO-SIMD128-NEXT: i32.add $push44=, $pop43, $pop42
+; NO-SIMD128-NEXT: i32.const $push111=, 1
+; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop111
+; NO-SIMD128-NEXT: i32.const $push110=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push46=, $pop45, $pop110
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop46
+; NO-SIMD128-NEXT: i32.const $push109=, 255
+; NO-SIMD128-NEXT: i32.and $push48=, $7, $pop109
+; NO-SIMD128-NEXT: i32.const $push108=, 255
+; NO-SIMD128-NEXT: i32.and $push47=, $23, $pop108
+; NO-SIMD128-NEXT: i32.add $push49=, $pop48, $pop47
+; NO-SIMD128-NEXT: i32.const $push107=, 1
+; NO-SIMD128-NEXT: i32.add $push50=, $pop49, $pop107
+; NO-SIMD128-NEXT: i32.const $push106=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push51=, $pop50, $pop106
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop51
+; NO-SIMD128-NEXT: i32.const $push105=, 255
+; NO-SIMD128-NEXT: i32.and $push53=, $6, $pop105
+; NO-SIMD128-NEXT: i32.const $push104=, 255
+; NO-SIMD128-NEXT: i32.and $push52=, $22, $pop104
+; NO-SIMD128-NEXT: i32.add $push54=, $pop53, $pop52
+; NO-SIMD128-NEXT: i32.const $push103=, 1
+; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop103
+; NO-SIMD128-NEXT: i32.const $push102=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push56=, $pop55, $pop102
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop56
+; NO-SIMD128-NEXT: i32.const $push101=, 255
+; NO-SIMD128-NEXT: i32.and $push58=, $5, $pop101
+; NO-SIMD128-NEXT: i32.const $push100=, 255
+; NO-SIMD128-NEXT: i32.and $push57=, $21, $pop100
+; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop57
+; NO-SIMD128-NEXT: i32.const $push99=, 1
+; NO-SIMD128-NEXT: i32.add $push60=, $pop59, $pop99
+; NO-SIMD128-NEXT: i32.const $push98=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop98
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop61
+; NO-SIMD128-NEXT: i32.const $push97=, 255
+; NO-SIMD128-NEXT: i32.and $push63=, $4, $pop97
+; NO-SIMD128-NEXT: i32.const $push96=, 255
+; NO-SIMD128-NEXT: i32.and $push62=, $20, $pop96
+; NO-SIMD128-NEXT: i32.add $push64=, $pop63, $pop62
+; NO-SIMD128-NEXT: i32.const $push95=, 1
+; NO-SIMD128-NEXT: i32.add $push65=, $pop64, $pop95
+; NO-SIMD128-NEXT: i32.const $push94=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push66=, $pop65, $pop94
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop66
+; NO-SIMD128-NEXT: i32.const $push93=, 255
+; NO-SIMD128-NEXT: i32.and $push68=, $3, $pop93
+; NO-SIMD128-NEXT: i32.const $push92=, 255
+; NO-SIMD128-NEXT: i32.and $push67=, $19, $pop92
+; NO-SIMD128-NEXT: i32.add $push69=, $pop68, $pop67
+; NO-SIMD128-NEXT: i32.const $push91=, 1
+; NO-SIMD128-NEXT: i32.add $push70=, $pop69, $pop91
+; NO-SIMD128-NEXT: i32.const $push90=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push71=, $pop70, $pop90
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop71
+; NO-SIMD128-NEXT: i32.const $push89=, 255
+; NO-SIMD128-NEXT: i32.and $push73=, $2, $pop89
+; NO-SIMD128-NEXT: i32.const $push88=, 255
+; NO-SIMD128-NEXT: i32.and $push72=, $18, $pop88
+; NO-SIMD128-NEXT: i32.add $push74=, $pop73, $pop72
+; NO-SIMD128-NEXT: i32.const $push87=, 1
+; NO-SIMD128-NEXT: i32.add $push75=, $pop74, $pop87
+; NO-SIMD128-NEXT: i32.const $push86=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push76=, $pop75, $pop86
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop76
+; NO-SIMD128-NEXT: i32.const $push85=, 255
+; NO-SIMD128-NEXT: i32.and $push78=, $1, $pop85
+; NO-SIMD128-NEXT: i32.const $push84=, 255
+; NO-SIMD128-NEXT: i32.and $push77=, $17, $pop84
+; NO-SIMD128-NEXT: i32.add $push79=, $pop78, $pop77
+; NO-SIMD128-NEXT: i32.const $push83=, 1
+; NO-SIMD128-NEXT: i32.add $push80=, $pop79, $pop83
+; NO-SIMD128-NEXT: i32.const $push82=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push81=, $pop80, $pop82
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop81
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: avgr_u_v16i8_zext:
+; NO-SIMD128-FAST: .functype avgr_u_v16i8_zext (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
+; NO-SIMD128-FAST-NEXT: i32.const $push143=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop143
+; NO-SIMD128-FAST-NEXT: i32.add $push3=, $pop2, $pop1
+; NO-SIMD128-FAST-NEXT: i32.const $push4=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push5=, $pop3, $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push142=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop142
+; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push141=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $2, $pop141
+; NO-SIMD128-FAST-NEXT: i32.const $push140=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop140
+; NO-SIMD128-FAST-NEXT: i32.add $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push139=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push10=, $pop9, $pop139
+; NO-SIMD128-FAST-NEXT: i32.const $push138=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push11=, $pop10, $pop138
+; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.const $push137=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $3, $pop137
+; NO-SIMD128-FAST-NEXT: i32.const $push136=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $19, $pop136
+; NO-SIMD128-FAST-NEXT: i32.add $push14=, $pop13, $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push135=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop135
+; NO-SIMD128-FAST-NEXT: i32.const $push134=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $pop134
+; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push133=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $4, $pop133
+; NO-SIMD128-FAST-NEXT: i32.const $push132=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $20, $pop132
+; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.const $push131=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push20=, $pop19, $pop131
+; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop130
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push129=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $5, $pop129
+; NO-SIMD128-FAST-NEXT: i32.const $push128=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $21, $pop128
+; NO-SIMD128-FAST-NEXT: i32.add $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push25=, $pop24, $pop127
+; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $pop126
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push125=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $6, $pop125
+; NO-SIMD128-FAST-NEXT: i32.const $push124=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $22, $pop124
+; NO-SIMD128-FAST-NEXT: i32.add $push29=, $pop28, $pop27
+; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push30=, $pop29, $pop123
+; NO-SIMD128-FAST-NEXT: i32.const $push122=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push31=, $pop30, $pop122
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop31
+; NO-SIMD128-FAST-NEXT: i32.const $push121=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push33=, $7, $pop121
+; NO-SIMD128-FAST-NEXT: i32.const $push120=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $23, $pop120
+; NO-SIMD128-FAST-NEXT: i32.add $push34=, $pop33, $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push119=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop119
+; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $pop118
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push38=, $8, $pop117
+; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $24, $pop116
+; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop37
+; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push40=, $pop39, $pop115
+; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop114
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop41
+; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push43=, $9, $pop113
+; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push42=, $25, $pop112
+; NO-SIMD128-FAST-NEXT: i32.add $push44=, $pop43, $pop42
+; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push45=, $pop44, $pop111
+; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push46=, $pop45, $pop110
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop46
+; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push48=, $10, $pop109
+; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push47=, $26, $pop108
+; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push50=, $pop49, $pop107
+; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop51
+; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push53=, $11, $pop105
+; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push52=, $27, $pop104
+; NO-SIMD128-FAST-NEXT: i32.add $push54=, $pop53, $pop52
+; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop103
+; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push56=, $pop55, $pop102
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop56
+; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop101
+; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop100
+; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop57
+; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push60=, $pop59, $pop99
+; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop98
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop61
+; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push63=, $13, $pop97
+; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push62=, $29, $pop96
+; NO-SIMD128-FAST-NEXT: i32.add $push64=, $pop63, $pop62
+; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push65=, $pop64, $pop95
+; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push66=, $pop65, $pop94
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop66
+; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push68=, $14, $pop93
+; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push67=, $30, $pop92
+; NO-SIMD128-FAST-NEXT: i32.add $push69=, $pop68, $pop67
+; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push70=, $pop69, $pop91
+; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push71=, $pop70, $pop90
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop71
+; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push73=, $15, $pop89
+; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push72=, $31, $pop88
+; NO-SIMD128-FAST-NEXT: i32.add $push74=, $pop73, $pop72
+; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push75=, $pop74, $pop87
+; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push76=, $pop75, $pop86
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop76
+; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push78=, $16, $pop85
+; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push77=, $32, $pop84
+; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop77
+; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push80=, $pop79, $pop83
+; NO-SIMD128-FAST-NEXT: i32.const $push82=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop82
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop81
+; NO-SIMD128-FAST-NEXT: return
+ %x.zext = zext <16 x i8> %x to <16 x i16>
+ %y.zext = zext <16 x i8> %y to <16 x i16>
+ %a = add <16 x i16> %x.zext, %y.zext
+ %b = add <16 x i16> %a, splat (i16 1)
+ %c = lshr <16 x i16> %b, splat (i16 1)
+ %c.trunc = trunc <16 x i16> %c to <16 x i8>
+ ret <16 x i8> %c.trunc
+}
+
+define <8 x i16> @avgr_u_v8i16_zext(<8 x i16> %x, <8 x i16> %y) {
+; SIMD128-LABEL: avgr_u_v8i16_zext:
+; SIMD128: .functype avgr_u_v8i16_zext (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: i16x8.avgr_u $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: avgr_u_v8i16_zext:
+; SIMD128-FAST: .functype avgr_u_v8i16_zext (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: i16x8.avgr_u $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: avgr_u_v8i16_zext:
+; NO-SIMD128: .functype avgr_u_v8i16_zext (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.const $push0=, 65535
+; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0
+; NO-SIMD128-NEXT: i32.const $push71=, 65535
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop71
+; NO-SIMD128-NEXT: i32.add $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.const $push4=, 1
+; NO-SIMD128-NEXT: i32.add $push5=, $pop3, $pop4
+; NO-SIMD128-NEXT: i32.const $push70=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop70
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push69=, 65535
+; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop69
+; NO-SIMD128-NEXT: i32.const $push68=, 65535
+; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop68
+; NO-SIMD128-NEXT: i32.add $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.const $push67=, 1
+; NO-SIMD128-NEXT: i32.add $push10=, $pop9, $pop67
+; NO-SIMD128-NEXT: i32.const $push66=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push11=, $pop10, $pop66
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop11
+; NO-SIMD128-NEXT: i32.const $push65=, 65535
+; NO-SIMD128-NEXT: i32.and $push13=, $6, $pop65
+; NO-SIMD128-NEXT: i32.const $push64=, 65535
+; NO-SIMD128-NEXT: i32.and $push12=, $14, $pop64
+; NO-SIMD128-NEXT: i32.add $push14=, $pop13, $pop12
+; NO-SIMD128-NEXT: i32.const $push63=, 1
+; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop63
+; NO-SIMD128-NEXT: i32.const $push62=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $pop62
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop16
+; NO-SIMD128-NEXT: i32.const $push61=, 65535
+; NO-SIMD128-NEXT: i32.and $push18=, $5, $pop61
+; NO-SIMD128-NEXT: i32.const $push60=, 65535
+; NO-SIMD128-NEXT: i32.and $push17=, $13, $pop60
+; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.const $push59=, 1
+; NO-SIMD128-NEXT: i32.add $push20=, $pop19, $pop59
+; NO-SIMD128-NEXT: i32.const $push58=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop58
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop21
+; NO-SIMD128-NEXT: i32.const $push57=, 65535
+; NO-SIMD128-NEXT: i32.and $push23=, $4, $pop57
+; NO-SIMD128-NEXT: i32.const $push56=, 65535
+; NO-SIMD128-NEXT: i32.and $push22=, $12, $pop56
+; NO-SIMD128-NEXT: i32.add $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.const $push55=, 1
+; NO-SIMD128-NEXT: i32.add $push25=, $pop24, $pop55
+; NO-SIMD128-NEXT: i32.const $push54=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push26=, $pop25, $pop54
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop26
+; NO-SIMD128-NEXT: i32.const $push53=, 65535
+; NO-SIMD128-NEXT: i32.and $push28=, $3, $pop53
+; NO-SIMD128-NEXT: i32.const $push52=, 65535
+; NO-SIMD128-NEXT: i32.and $push27=, $11, $pop52
+; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop27
+; NO-SIMD128-NEXT: i32.const $push51=, 1
+; NO-SIMD128-NEXT: i32.add $push30=, $pop29, $pop51
+; NO-SIMD128-NEXT: i32.const $push50=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop50
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop31
+; NO-SIMD128-NEXT: i32.const $push49=, 65535
+; NO-SIMD128-NEXT: i32.and $push33=, $2, $pop49
+; NO-SIMD128-NEXT: i32.const $push48=, 65535
+; NO-SIMD128-NEXT: i32.and $push32=, $10, $pop48
+; NO-SIMD128-NEXT: i32.add $push34=, $pop33, $pop32
+; NO-SIMD128-NEXT: i32.const $push47=, 1
+; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47
+; NO-SIMD128-NEXT: i32.const $push46=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $pop46
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop36
+; NO-SIMD128-NEXT: i32.const $push45=, 65535
+; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop45
+; NO-SIMD128-NEXT: i32.const $push44=, 65535
+; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop44
+; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop37
+; NO-SIMD128-NEXT: i32.const $push43=, 1
+; NO-SIMD128-NEXT: i32.add $push40=, $pop39, $pop43
+; NO-SIMD128-NEXT: i32.const $push42=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop41
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: avgr_u_v8i16_zext:
+; NO-SIMD128-FAST: .functype avgr_u_v8i16_zext (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
+; NO-SIMD128-FAST-NEXT: i32.const $push71=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop71
+; NO-SIMD128-FAST-NEXT: i32.add $push3=, $pop2, $pop1
+; NO-SIMD128-FAST-NEXT: i32.const $push4=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push5=, $pop3, $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop70
+; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $2, $pop69
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $10, $pop68
+; NO-SIMD128-FAST-NEXT: i32.add $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push10=, $pop9, $pop67
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push11=, $pop10, $pop66
+; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.const $push65=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $3, $pop65
+; NO-SIMD128-FAST-NEXT: i32.const $push64=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $11, $pop64
+; NO-SIMD128-FAST-NEXT: i32.add $push14=, $pop13, $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop63
+; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $pop62
+; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $4, $pop61
+; NO-SIMD128-FAST-NEXT: i32.const $push60=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $12, $pop60
+; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push20=, $pop19, $pop59
+; NO-SIMD128-FAST-NEXT: i32.const $push58=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop58
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push57=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $5, $pop57
+; NO-SIMD128-FAST-NEXT: i32.const $push56=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $13, $pop56
+; NO-SIMD128-FAST-NEXT: i32.add $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push25=, $pop24, $pop55
+; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $pop54
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $6, $pop53
+; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $14, $pop52
+; NO-SIMD128-FAST-NEXT: i32.add $push29=, $pop28, $pop27
+; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push30=, $pop29, $pop51
+; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push31=, $pop30, $pop50
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop31
+; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push33=, $7, $pop49
+; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $15, $pop48
+; NO-SIMD128-FAST-NEXT: i32.add $push34=, $pop33, $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $pop46
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push38=, $8, $pop45
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $16, $pop44
+; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop37
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push40=, $pop39, $pop43
+; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop41
+; NO-SIMD128-FAST-NEXT: return
+ %x.zext = zext <8 x i16> %x to <8 x i32>
+ %y.zext = zext <8 x i16> %y to <8 x i32>
+ %a = add <8 x i32> %x.zext, %y.zext
+ %b = add <8 x i32> %a, splat (i32 1)
+ %c = lshr <8 x i32> %b, splat (i32 1)
+ %c.trunc = trunc <8 x i32> %c to <8 x i16>
+ ret <8 x i16> %c.trunc
+}
define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) {
; SIMD128-LABEL: avgr_u_v16i8_wrap:
; SIMD128: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128)
More information about the llvm-commits
mailing list