[llvm] 3b485a6 - [AArch64] Mark known zero for high 16-bits of uaddlv intrinsic output with v8i8

Thu Aug 24 03:11:54 PDT 2023

Author: Jingu Kang
Date: 2023-08-24T10:55:19+01:00
New Revision: 3b485a6622cdf3a454a95c73614ba3cdc7299c01

URL: https://github.com/llvm/llvm-project/commit/3b485a6622cdf3a454a95c73614ba3cdc7299c01
DIFF: https://github.com/llvm/llvm-project/commit/3b485a6622cdf3a454a95c73614ba3cdc7299c01.diff

LOG: [AArch64] Mark known zero for high 16-bits of uaddlv intrinsic output with v8i8

The uaddlv with v8i8 returns 16-bits value but clang generates 32-bits intrinsic
and trunc for it. In this case, we can mark known zero for the high 16-bits of
the intrinsic output.

Differential Revision:

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/neon-addlv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 64fc4a118f92d0..1ccd9d3a73da0d 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2162,6 +2162,16 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     switch (IntNo) {
     default:
       break;
+    case Intrinsic::aarch64_neon_uaddlv: {
+      MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
+      unsigned BitWidth = Known.getBitWidth();
+      if (VT == MVT::v8i8) {
+        assert(BitWidth >= 16 && "Unexpected width!");
+        APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
+        Known.Zero |= Mask;
+      }
+      break;
+    }
     case Intrinsic::aarch64_neon_umaxv:
     case Intrinsic::aarch64_neon_uminv: {
       // Figure out the datatype of the vector operand. The UMINV instruction

diff  --git a/llvm/test/CodeGen/AArch64/neon-addlv.ll b/llvm/test/CodeGen/AArch64/neon-addlv.ll
index d6d9884a750672..86cb0f6a37eb43 100644
--- a/llvm/test/CodeGen/AArch64/neon-addlv.ll
+++ b/llvm/test/CodeGen/AArch64/neon-addlv.ll
@@ -150,3 +150,16 @@ define i32 @saddlv4h_from_v4i16(ptr %A) nounwind {
   %tmp5 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %tmp3)
   ret i32 %tmp5
 }
+
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) nounwind readnone
+
+define i32 @uaddlv_known_bits(<8 x i8> %a) {
+; CHECK-LABEL: uaddlv_known_bits:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddlv h0, v0.8b
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+  %tmp1 = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
+  %tmp2 = and i32 %tmp1, 65535
+  ret i32 %tmp2
+}