[llvm] 3b485a6 - [AArch64] Mark known zero for high 16-bits of uaddlv intrinsic output with v8i8
Jingu Kang via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 24 03:11:54 PDT 2023
Author: Jingu Kang
Date: 2023-08-24T10:55:19+01:00
New Revision: 3b485a6622cdf3a454a95c73614ba3cdc7299c01
URL: https://github.com/llvm/llvm-project/commit/3b485a6622cdf3a454a95c73614ba3cdc7299c01
DIFF: https://github.com/llvm/llvm-project/commit/3b485a6622cdf3a454a95c73614ba3cdc7299c01.diff
LOG: [AArch64] Mark known zero for high 16-bits of uaddlv intrinsic output with v8i8
The uaddlv with v8i8 returns 16-bits value but clang generates 32-bits intrinsic
and trunc for it. In this case, we can mark known zero for the high 16-bits of
the intrinsic output.
Differential Revision:
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/neon-addlv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 64fc4a118f92d0..1ccd9d3a73da0d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2162,6 +2162,16 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
switch (IntNo) {
default:
break;
+ case Intrinsic::aarch64_neon_uaddlv: {
+ MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
+ unsigned BitWidth = Known.getBitWidth();
+ if (VT == MVT::v8i8) {
+ assert(BitWidth >= 16 && "Unexpected width!");
+ APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
+ Known.Zero |= Mask;
+ }
+ break;
+ }
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv: {
// Figure out the datatype of the vector operand. The UMINV instruction
diff --git a/llvm/test/CodeGen/AArch64/neon-addlv.ll b/llvm/test/CodeGen/AArch64/neon-addlv.ll
index d6d9884a750672..86cb0f6a37eb43 100644
--- a/llvm/test/CodeGen/AArch64/neon-addlv.ll
+++ b/llvm/test/CodeGen/AArch64/neon-addlv.ll
@@ -150,3 +150,16 @@ define i32 @saddlv4h_from_v4i16(ptr %A) nounwind {
%tmp5 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %tmp3)
ret i32 %tmp5
}
+
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) nounwind readnone
+
+define i32 @uaddlv_known_bits(<8 x i8> %a) {
+; CHECK-LABEL: uaddlv_known_bits:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uaddlv h0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %tmp1 = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
+ %tmp2 = and i32 %tmp1, 65535
+ ret i32 %tmp2
+}
More information about the llvm-commits
mailing list