[llvm] c0ecbfa - [AArch64] Known bits for AArch64ISD::DUP

Mon Jun 20 11:12:02 PDT 2022

Author: David Green
Date: 2022-06-20T19:11:57+01:00
New Revision: c0ecbfa4fdf0906745679c1262b08dc4cdbb5537

URL: https://github.com/llvm/llvm-project/commit/c0ecbfa4fdf0906745679c1262b08dc4cdbb5537
DIFF: https://github.com/llvm/llvm-project/commit/c0ecbfa4fdf0906745679c1262b08dc4cdbb5537.diff

LOG: [AArch64] Known bits for AArch64ISD::DUP

An AArch64ISD::DUP is just a splat, where the known bits for each lane
are the same as the input. This teaches that to computeKnownBitsForTargetNode.

Problems arise for constants though, as a constant BUILD_VECTOR can be
lowered to an AArch64ISD::DUP, which SimplifyDemandedBits would then
turn back into a constant BUILD_VECTOR leading to an infinite cycle.
This has been prevented by adding a isTargetCanonicalConstantNode node
to prevent the conversion back into a BUILD_VECTOR.

Differential Revision: https://reviews.llvm.org/D128144

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/test/CodeGen/AArch64/arm64-build-vector.ll
    llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cbfd284b31ca2..484fd2dec4a10 100644

--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3785,6 +3785,12 @@ class TargetLowering : public TargetLoweringBase {
                                          APInt &UndefElts,
                                          unsigned Depth = 0) const;
 
+  /// Returns true if the given Opc is considered a canonical constant for the
+  /// target, which should not be transformed back into a BUILD_VECTOR.
+  virtual bool isTargetCanonicalConstantNode(SDValue Op) const {
+    return Op.getOpcode() == ISD::SPLAT_VECTOR;
+  }
+
   struct DAGCombinerInfo {
     void *DC;  // The DAG Combiner object.
     CombineLevel Level;

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 20deb39885c1a..a6b471ea22b78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2588,7 +2588,8 @@ bool TargetLowering::SimplifyDemandedBits(
 
   // If we know the value of all of the demanded bits, return this as a
   // constant.
-  if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
+  if (!isTargetCanonicalConstantNode(Op) &&
+      DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
     // Avoid folding to a constant if any OpaqueConstant is involved.
     const SDNode *N = Op.getNode();
     for (SDNode *Op :

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4ed7c0905d742..bec4d7919aac8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1805,11 +1805,21 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant(
 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
 /// Mask are known to be either zero or one and return them Known.
 void AArch64TargetLowering::computeKnownBitsForTargetNode(
-    const SDValue Op, KnownBits &Known,
-    const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
+    const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
+    const SelectionDAG &DAG, unsigned Depth) const {
   switch (Op.getOpcode()) {
   default:
     break;
+  case AArch64ISD::DUP: {
+    SDValue SrcOp = Op.getOperand(0);
+    Known = DAG.computeKnownBits(SrcOp, Depth + 1);
+    if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
+      assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
+             "Expected DUP implicit truncation");
+      Known = Known.trunc(Op.getScalarValueSizeInBits());
+    }
+    break;
+  }
   case AArch64ISD::CSEL: {
     KnownBits Known2;
     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
@@ -14858,6 +14868,10 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
   bool IsStrict = N0->isStrictFPOpcode();
 
+  // extract(dup x) -> x
+  if (N0.getOpcode() == AArch64ISD::DUP)
+    return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+
   // Rewrite for pairwise fadd pattern
   //   (f32 (extract_vector_elt
   //           (fadd (vXf32 Other)

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 746732c865e8c..4c64684cbcd85 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1129,6 +1129,11 @@ class AArch64TargetLowering : public TargetLowering {
                                          TargetLoweringOpt &TLO,
                                          unsigned Depth) const override;
 
+  bool isTargetCanonicalConstantNode(SDValue Op) const override {
+    return Op.getOpcode() == AArch64ISD::DUP ||
+           TargetLowering::isTargetCanonicalConstantNode(Op);
+  }
+
   // Normally SVE is only used for byte size vectors that do not fit within a
   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
   // used for 64bit and 128bit vectors as well.

diff  --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
index 4998574f07c60..0dc369c90761f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -57,8 +57,8 @@ define void @widen_f16_build_vector(half* %addr) {
 ; CHECK-LABEL: widen_f16_build_vector:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #13294
-; CHECK-NEXT:    dup.4h v0, w8
-; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    movk w8, #13294, lsl #16
+; CHECK-NEXT:    str w8, [x0]
 ; CHECK-NEXT:    ret
   %1 = bitcast half* %addr to <2 x half>*
   store <2 x half> <half 0xH33EE, half 0xH33EE>, <2 x half>* %1, align 2

diff  --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index 870190807b0d9..0e3a29caba413 100644
--- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -164,10 +164,9 @@ define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, <4 x i16>* %dest
 ; CHECK-LABEL: testRightBad4x16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    ushr.4h v1, v1, #14
 ; CHECK-NEXT:    dup.4h v2, w8
 ; CHECK-NEXT:    and.8b v0, v0, v2
-; CHECK-NEXT:    orr.8b v0, v0, v1
+; CHECK-NEXT:    usra.4h v0, v1, #14
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
   %and.i = and <4 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500>
@@ -224,10 +223,9 @@ define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, <8 x i16>* %dest
 ; CHECK-LABEL: testRightBad8x16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    ushr.8h v1, v1, #14
 ; CHECK-NEXT:    dup.8h v2, w8
 ; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
+; CHECK-NEXT:    usra.8h v0, v1, #14
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %and.i = and <8 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500>