[llvm] c0ecbfa - [AArch64] Known bits for AArch64ISD::DUP
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 20 11:12:02 PDT 2022
Author: David Green
Date: 2022-06-20T19:11:57+01:00
New Revision: c0ecbfa4fdf0906745679c1262b08dc4cdbb5537
URL: https://github.com/llvm/llvm-project/commit/c0ecbfa4fdf0906745679c1262b08dc4cdbb5537
DIFF: https://github.com/llvm/llvm-project/commit/c0ecbfa4fdf0906745679c1262b08dc4cdbb5537.diff
LOG: [AArch64] Known bits for AArch64ISD::DUP
An AArch64ISD::DUP is just a splat, where the known bits for each lane
are the same as the input. This teaches that to computeKnownBitsForTargetNode.
Problems arise for constants though, as a constant BUILD_VECTOR can be
lowered to an AArch64ISD::DUP, which SimplifyDemandedBits would then
turn back into a constant BUILD_VECTOR leading to an infinite cycle.
This has been prevented by adding a isTargetCanonicalConstantNode node
to prevent the conversion back into a BUILD_VECTOR.
Differential Revision: https://reviews.llvm.org/D128144
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/arm64-build-vector.ll
llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cbfd284b31ca2..484fd2dec4a10 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3785,6 +3785,12 @@ class TargetLowering : public TargetLoweringBase {
APInt &UndefElts,
unsigned Depth = 0) const;
+ /// Returns true if the given Opc is considered a canonical constant for the
+ /// target, which should not be transformed back into a BUILD_VECTOR.
+ virtual bool isTargetCanonicalConstantNode(SDValue Op) const {
+ return Op.getOpcode() == ISD::SPLAT_VECTOR;
+ }
+
struct DAGCombinerInfo {
void *DC; // The DAG Combiner object.
CombineLevel Level;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 20deb39885c1a..a6b471ea22b78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2588,7 +2588,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we know the value of all of the demanded bits, return this as a
// constant.
- if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
+ if (!isTargetCanonicalConstantNode(Op) &&
+ DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNode *Op :
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4ed7c0905d742..bec4d7919aac8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1805,11 +1805,21 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant(
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them Known.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
- const SDValue Op, KnownBits &Known,
- const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
+ const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
+ const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
+ case AArch64ISD::DUP: {
+ SDValue SrcOp = Op.getOperand(0);
+ Known = DAG.computeKnownBits(SrcOp, Depth + 1);
+ if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
+ assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
+ "Expected DUP implicit truncation");
+ Known = Known.trunc(Op.getScalarValueSizeInBits());
+ }
+ break;
+ }
case AArch64ISD::CSEL: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
@@ -14858,6 +14868,10 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
bool IsStrict = N0->isStrictFPOpcode();
+ // extract(dup x) -> x
+ if (N0.getOpcode() == AArch64ISD::DUP)
+ return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+
// Rewrite for pairwise fadd pattern
// (f32 (extract_vector_elt
// (fadd (vXf32 Other)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 746732c865e8c..4c64684cbcd85 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1129,6 +1129,11 @@ class AArch64TargetLowering : public TargetLowering {
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ bool isTargetCanonicalConstantNode(SDValue Op) const override {
+ return Op.getOpcode() == AArch64ISD::DUP ||
+ TargetLowering::isTargetCanonicalConstantNode(Op);
+ }
+
// Normally SVE is only used for byte size vectors that do not fit within a
// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
// used for 64bit and 128bit vectors as well.
diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
index 4998574f07c60..0dc369c90761f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -57,8 +57,8 @@ define void @widen_f16_build_vector(half* %addr) {
; CHECK-LABEL: widen_f16_build_vector:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #13294
-; CHECK-NEXT: dup.4h v0, w8
-; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: movk w8, #13294, lsl #16
+; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: ret
%1 = bitcast half* %addr to <2 x half>*
store <2 x half> <half 0xH33EE, half 0xH33EE>, <2 x half>* %1, align 2
diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index 870190807b0d9..0e3a29caba413 100644
--- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -164,10 +164,9 @@ define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, <4 x i16>* %dest
; CHECK-LABEL: testRightBad4x16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16500
-; CHECK-NEXT: ushr.4h v1, v1, #14
; CHECK-NEXT: dup.4h v2, w8
; CHECK-NEXT: and.8b v0, v0, v2
-; CHECK-NEXT: orr.8b v0, v0, v1
+; CHECK-NEXT: usra.4h v0, v1, #14
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%and.i = and <4 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500>
@@ -224,10 +223,9 @@ define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, <8 x i16>* %dest
; CHECK-LABEL: testRightBad8x16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16500
-; CHECK-NEXT: ushr.8h v1, v1, #14
; CHECK-NEXT: dup.8h v2, w8
; CHECK-NEXT: and.16b v0, v0, v2
-; CHECK-NEXT: orr.16b v0, v0, v1
+; CHECK-NEXT: usra.8h v0, v1, #14
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%and.i = and <8 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500>
More information about the llvm-commits
mailing list