[llvm] e44646b - [WebAssembly] Lower ANY_EXTEND_VECTOR_INREG (#167529)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 00:57:12 PST 2025
Author: Sam Parker
Date: 2025-11-20T08:57:08Z
New Revision: e44646b79594006c9dc7deda6a9ae447243bd9e3
URL: https://github.com/llvm/llvm-project/commit/e44646b79594006c9dc7deda6a9ae447243bd9e3
DIFF: https://github.com/llvm/llvm-project/commit/e44646b79594006c9dc7deda6a9ae447243bd9e3.diff
LOG: [WebAssembly] Lower ANY_EXTEND_VECTOR_INREG (#167529)
Treat it in the same manner of zero_extend_vector_inreg and generate an
extend_low_u if possible. This is to try an prevent expensive shuffles
from being generated instead. computeKnownBitsForTargetNode has also
been updated to specify known zeros on extend_low_u.
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/test/CodeGen/WebAssembly/simd-arith.ll
llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index abd8b2e095ae1..98cb7aba562c4 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -319,6 +319,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Support vector extending
for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
}
@@ -1136,7 +1137,27 @@ void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
}
break;
}
-
+ case WebAssemblyISD::EXTEND_LOW_U:
+ case WebAssemblyISD::EXTEND_HIGH_U: {
+ // We know the high half, of each destination vector element, will be zero.
+ SDValue SrcOp = Op.getOperand(0);
+ EVT VT = SrcOp.getSimpleValueType();
+ unsigned BitWidth = Known.getBitWidth();
+ if (VT == MVT::v8i8 || VT == MVT::v16i8) {
+ assert(BitWidth >= 8 && "Unexpected width!");
+ APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
+ Known.Zero |= Mask;
+ } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
+ assert(BitWidth >= 16 && "Unexpected width!");
+ APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
+ Known.Zero |= Mask;
+ } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
+ assert(BitWidth >= 32 && "Unexpected width!");
+ APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 32);
+ Known.Zero |= Mask;
+ }
+ break;
+ }
// For 128-bit addition if the upper bits are all zero then it's known that
// the upper bits of the result will have all bits guaranteed zero except the
// first.
@@ -1705,6 +1726,7 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
return LowerEXTEND_VECTOR_INREG(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
@@ -2299,6 +2321,9 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
unsigned Ext;
switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected opcode");
+ case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
Ext = WebAssemblyISD::EXTEND_LOW_U;
break;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index d698fad745dfb..60b4a837f7c31 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -1997,38 +1997,30 @@ define void @avgr_undef_shuffle_lanes(ptr %res, <8 x i8> %a, <8 x i8> %b, <8 x i
; SIMD128: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> ()
; SIMD128-NEXT: # %bb.0:
; SIMD128-NEXT: i8x16.avgr_u $push1=, $1, $2
-; SIMD128-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
-; SIMD128-NEXT: local.tee $push11=, $2=, $pop12
+; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push8=, $pop1
+; SIMD128-NEXT: local.tee $push7=, $2=, $pop8
; SIMD128-NEXT: i8x16.avgr_u $push0=, $3, $4
-; SIMD128-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
-; SIMD128-NEXT: local.tee $push9=, $4=, $pop10
-; SIMD128-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
-; SIMD128-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255
-; SIMD128-NEXT: local.tee $push7=, $3=, $pop8
-; SIMD128-NEXT: v128.and $push5=, $pop4, $pop7
+; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push6=, $pop0
+; SIMD128-NEXT: local.tee $push5=, $4=, $pop6
+; SIMD128-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
; SIMD128-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31
-; SIMD128-NEXT: v128.and $push3=, $pop2, $3
-; SIMD128-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3
-; SIMD128-NEXT: v128.store 0($0):p2align=0, $pop6
+; SIMD128-NEXT: i8x16.narrow_i16x8_u $push4=, $pop3, $pop2
+; SIMD128-NEXT: v128.store 0($0):p2align=0, $pop4
; SIMD128-NEXT: return
;
; SIMD128-FAST-LABEL: avgr_undef_shuffle_lanes:
; SIMD128-FAST: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> ()
; SIMD128-FAST-NEXT: # %bb.0:
; SIMD128-FAST-NEXT: i8x16.avgr_u $push1=, $1, $2
-; SIMD128-FAST-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
-; SIMD128-FAST-NEXT: local.tee $push11=, $2=, $pop12
+; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push8=, $pop1
+; SIMD128-FAST-NEXT: local.tee $push7=, $2=, $pop8
; SIMD128-FAST-NEXT: i8x16.avgr_u $push0=, $3, $4
-; SIMD128-FAST-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
-; SIMD128-FAST-NEXT: local.tee $push9=, $4=, $pop10
-; SIMD128-FAST-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
-; SIMD128-FAST-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255
-; SIMD128-FAST-NEXT: local.tee $push7=, $3=, $pop8
-; SIMD128-FAST-NEXT: v128.and $push5=, $pop4, $pop7
+; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push6=, $pop0
+; SIMD128-FAST-NEXT: local.tee $push5=, $4=, $pop6
+; SIMD128-FAST-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
; SIMD128-FAST-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31
-; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $3
-; SIMD128-FAST-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3
-; SIMD128-FAST-NEXT: v128.store 0($0):p2align=0, $pop6
+; SIMD128-FAST-NEXT: i8x16.narrow_i16x8_u $push4=, $pop3, $pop2
+; SIMD128-FAST-NEXT: v128.store 0($0):p2align=0, $pop4
; SIMD128-FAST-NEXT: return
;
; NO-SIMD128-LABEL: avgr_undef_shuffle_lanes:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
index f7143711394fa..70c6baf2be005 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
@@ -276,7 +276,7 @@ define i1 @test_any_v8i8(<8 x i8> %x) {
; CHECK-LABEL: test_any_v8i8:
; CHECK: .functype test_any_v8i8 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
+; CHECK-NEXT: i16x8.extend_low_i8x16_u $push0=, $0
; CHECK-NEXT: i32.const $push1=, 15
; CHECK-NEXT: i16x8.shl $push2=, $pop0, $pop1
; CHECK-NEXT: i32.const $push5=, 15
@@ -292,7 +292,7 @@ define i1 @test_all_v8i8(<8 x i8> %x) {
; CHECK-LABEL: test_all_v8i8:
; CHECK: .functype test_all_v8i8 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
+; CHECK-NEXT: i16x8.extend_low_i8x16_u $push0=, $0
; CHECK-NEXT: i32.const $push1=, 15
; CHECK-NEXT: i16x8.shl $push2=, $pop0, $pop1
; CHECK-NEXT: i32.const $push5=, 15
More information about the llvm-commits
mailing list