[llvm] [AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling (PR #76644)
Sizov Nikita via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 8 03:00:46 PST 2024
https://github.com/snikitav updated https://github.com/llvm/llvm-project/pull/76644
>From afa5a78d50434f7ef1219df36811adb5a0e6bcc0 Mon Sep 17 00:00:00 2001
From: Sizov Nikita <s.nikita.v at gmail.com>
Date: Sun, 31 Dec 2023 04:33:21 +0300
Subject: [PATCH] Missing AArch64ISD::BICi handling
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 ++++++----
.../Target/AArch64/AArch64ISelLowering.cpp | 30 +++++++++++++++++++
.../AArch64/aarch64-known-bits-hadd.ll | 4 ---
3 files changed, 40 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f7ace79e8c51d4..937e6061bf01b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3416,13 +3416,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::mulhs(Known, Known2);
break;
}
- case ISD::AVGCEILU: {
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGCEILS: {
+ bool IsCeil = Opcode == ISD::AVGCEILU || Opcode == ISD::AVGCEILS;
+ bool IsSigned = Opcode == ISD::AVGFLOORS || Opcode == ISD::AVGCEILS;
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = Known.zext(BitWidth + 1);
- Known2 = Known2.zext(BitWidth + 1);
- KnownBits One = KnownBits::makeConstant(APInt(1, 1));
- Known = KnownBits::computeForAddCarry(Known, Known2, One);
+ Known = IsSigned ? Known.sext(BitWidth + 1) : Known.zext(BitWidth + 1);
+ Known2 = IsSigned ? Known2.sext(BitWidth + 1) : Known2.zext(BitWidth + 1);
+ KnownBits Carry = KnownBits::makeConstant(APInt(1, IsCeil ? 1 : 0));
+ Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
Known = Known.extractBits(BitWidth, 1);
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2290223a06f8ef..2a2f4e20efff02 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24387,6 +24387,19 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+ case AArch64ISD::BICi: {
+ KnownBits Known;
+ APInt DemandedBits =
+ APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
+ APInt DemandedElts =
+ APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(
+ SDValue(N, 0), DemandedBits, DemandedElts, Known, TLO))
+ return TLO.New;
+ break;
+ }
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
@@ -27427,6 +27440,23 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
// used - simplify to just Val.
return TLO.CombineTo(Op, ShiftR->getOperand(0));
}
+ case AArch64ISD::BICi: {
+ // Fold BICi if all destination bits already known to be zeroed
+ SDValue Op0 = Op.getOperand(0);
+ KnownBits KnownOp0 =
+ TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1);
+ // Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2))
+ uint64_t BitsToClear = Op->getConstantOperandVal(1)
+ << Op->getConstantOperandVal(2);
+ APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
+ if (APInt(Known.getBitWidth(), BitsToClear)
+ .isSubsetOf(AlreadyZeroedBitsToClear))
+ return TLO.CombineTo(Op, Op0);
+
+ Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));
+
+ return false;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
if (auto ElementSize = IsSVECntIntrinsic(Op)) {
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index 017f382774892c..f36b8440fe4bfb 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
More information about the llvm-commits
mailing list