[llvm] 61888d9 - [AArch64] Basic demand elements for some intrinsics

Thu Jan 13 03:53:20 PST 2022

Author: David Green
Date: 2022-01-13T11:53:12Z
New Revision: 61888d97f67d98afafc224620ee1f9ecc7d062d0

URL: https://github.com/llvm/llvm-project/commit/61888d97f67d98afafc224620ee1f9ecc7d062d0
DIFF: https://github.com/llvm/llvm-project/commit/61888d97f67d98afafc224620ee1f9ecc7d062d0.diff

LOG: [AArch64] Basic demand elements for some intrinsics

A lot of neon intrinsics work lane-wise, meaning that non-demanded
elements in and not demanded out. This teaches that to
AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic for some simple
single-input truncate intrinsics, which can help remove unnecessary
instructions.

Differential Revision: https://reviews.llvm.org/D117097

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/test/Transforms/InstCombine/AArch64/demandelts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0e8c5b820b919..1e276749045ed 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1168,6 +1168,32 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   return None;
 }
 
+Optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
+    InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
+    APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
+    std::function<void(Instruction *, unsigned, APInt, APInt &)>
+        SimplifyAndSetOp) const {
+  switch (II.getIntrinsicID()) {
+  default:
+    break;
+  case Intrinsic::aarch64_neon_fcvtxn:
+  case Intrinsic::aarch64_neon_rshrn:
+  case Intrinsic::aarch64_neon_sqrshrn:
+  case Intrinsic::aarch64_neon_sqrshrun:
+  case Intrinsic::aarch64_neon_sqshrn:
+  case Intrinsic::aarch64_neon_sqshrun:
+  case Intrinsic::aarch64_neon_sqxtn:
+  case Intrinsic::aarch64_neon_sqxtun:
+  case Intrinsic::aarch64_neon_uqrshrn:
+  case Intrinsic::aarch64_neon_uqshrn:
+  case Intrinsic::aarch64_neon_uqxtn:
+    SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
+    break;
+  }
+
+  return None;
+}
+
 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
                                            ArrayRef<const Value *> Args) {
 

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c3e1735cd4cd7..7ba645ba1737c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -106,6 +106,12 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
                                                IntrinsicInst &II) const;
 
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+      APInt &UndefElts2, APInt &UndefElts3,
+      std::function<void(Instruction *, unsigned, APInt, APInt &)>
+          SimplifyAndSetOp) const;
+
   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
     switch (K) {
     case TargetTransformInfo::RGK_Scalar:

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll b/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll
index 33ccdb19f39a2..c264f24eb8ce4 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll
@@ -3,8 +3,7 @@
 
 define <2 x float> @fcvtxn(<2 x double> %d1) {
 ; CHECK-LABEL: @fcvtxn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x double> [[D1:%.*]], <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[A]])
+; CHECK-NEXT:    [[I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[D1:%.*]])
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x float> [[I]], <2 x float> undef, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    ret <2 x float> [[S]]
 ;
@@ -16,7 +15,7 @@ define <2 x float> @fcvtxn(<2 x double> %d1) {
 
 define <4 x i16> @rshrn(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @rshrn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 9)
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -29,7 +28,7 @@ define <4 x i16> @rshrn(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @sqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @sqrshrn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[A]], i32 9)
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -42,7 +41,7 @@ define <4 x i16> @sqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @sqrshrun(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @sqrshrun(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[A]], i32 9)
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -55,7 +54,7 @@ define <4 x i16> @sqrshrun(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @sqshrn(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @sqshrn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[A]], i32 9)
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -68,7 +67,7 @@ define <4 x i16> @sqshrn(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @sqshrun(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @sqshrun(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[A]], i32 9)
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -81,7 +80,7 @@ define <4 x i16> @sqshrun(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @sqxtn(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @sqxtn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -94,7 +93,7 @@ define <4 x i16> @sqxtn(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @sqxtun(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @sqxtun(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -107,7 +106,7 @@ define <4 x i16> @sqxtun(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @uqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @uqrshrn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[A]], i32 9)
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -120,7 +119,7 @@ define <4 x i16> @uqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @uqshrn(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @uqshrn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[A]], i32 9)
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]
@@ -133,7 +132,7 @@ define <4 x i16> @uqshrn(<2 x i32> %d1, <2 x i32> %d2) {
 
 define <4 x i16> @uqxtn(<2 x i32> %d1, <2 x i32> %d2) {
 ; CHECK-LABEL: @uqxtn(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <4 x i16> [[S]]