[llvm] [AArch64] Move BSL generation to lowering. (PR #151855)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 3 00:59:27 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
It is gnerally better to allow the target independant combines before lowering to AArch64 specific ones (providing they don't mess it up). This moves the generation of BSL nodes to lowering, not a combine, so that intermediate nodes are more likely to be optimized. There is a small change in the constant handling to detect legalized buildvector arguements correctly.
---
Patch is 24.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151855.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+97-107)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll (+2-10)
- (modified) llvm/test/CodeGen/AArch64/combine-sdiv.ll (+34-65)
- (modified) llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll (+53-20)
- (modified) llvm/test/CodeGen/AArch64/urem-vector-lkk.ll (+2-5)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2b6ea86ee1af5..aaa65a73e7b22 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14737,12 +14737,109 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
return ResultSLI;
}
+static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+
+ if (!VT.isVector())
+ return SDValue();
+
+ if (VT.isScalableVector() && !Subtarget.hasSVE2())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ // InstCombine does (not (neg a)) => (add a -1).
+ // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
+ // Loop over all combinations of AND operands.
+ for (int i = 1; i >= 0; --i) {
+ for (int j = 1; j >= 0; --j) {
+ SDValue O0 = N0->getOperand(i);
+ SDValue O1 = N1->getOperand(j);
+ SDValue Sub, Add, SubSibling, AddSibling;
+
+ // Find a SUB and an ADD operand, one from each AND.
+ if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
+ Sub = O0;
+ Add = O1;
+ SubSibling = N0->getOperand(1 - i);
+ AddSibling = N1->getOperand(1 - j);
+ } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
+ Add = O0;
+ Sub = O1;
+ AddSibling = N0->getOperand(1 - i);
+ SubSibling = N1->getOperand(1 - j);
+ } else
+ continue;
+
+ if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
+ continue;
+
+ // Constant ones is always righthand operand of the Add.
+ if (!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
+ continue;
+
+ if (Sub.getOperand(1) != Add.getOperand(0))
+ continue;
+
+ return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
+ }
+ }
+
+ // (or (and a b) (and (not a) c)) => (bsl a b c)
+ // We only have to look for constant vectors here since the general, variable
+ // case can be handled in TableGen.
+ unsigned Bits = VT.getScalarSizeInBits();
+ for (int i = 1; i >= 0; --i)
+ for (int j = 1; j >= 0; --j) {
+ APInt Val1, Val2;
+
+ if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
+ ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
+ ~Val1.trunc(Bits) == Val2.trunc(Bits)) {
+ return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
+ N0->getOperand(1 - i), N1->getOperand(1 - j));
+ }
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
+ if (!BVN0 || !BVN1)
+ continue;
+
+ bool FoundMatch = true;
+ for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
+ ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
+ ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
+ if (!CN0 || !CN1 ||
+ CN0->getAPIntValue().trunc(Bits) !=
+ ~CN1->getAsAPIntVal().trunc(Bits)) {
+ FoundMatch = false;
+ break;
+ }
+ }
+ if (FoundMatch)
+ return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
+ N0->getOperand(1 - i), N1->getOperand(1 - j));
+ }
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
!Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
+ if (SDValue Res = tryLowerToBSL(Op, DAG))
+ return Res;
+
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;
@@ -19414,106 +19511,6 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
return FixConv;
}
-static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
- const AArch64TargetLowering &TLI) {
- EVT VT = N->getValueType(0);
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
-
- if (!VT.isVector())
- return SDValue();
-
- if (VT.isScalableVector() && !Subtarget.hasSVE2())
- return SDValue();
-
- if (VT.isFixedLengthVector() &&
- (!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT)))
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() != ISD::AND)
- return SDValue();
-
- SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() != ISD::AND)
- return SDValue();
-
- // InstCombine does (not (neg a)) => (add a -1).
- // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
- // Loop over all combinations of AND operands.
- for (int i = 1; i >= 0; --i) {
- for (int j = 1; j >= 0; --j) {
- SDValue O0 = N0->getOperand(i);
- SDValue O1 = N1->getOperand(j);
- SDValue Sub, Add, SubSibling, AddSibling;
-
- // Find a SUB and an ADD operand, one from each AND.
- if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
- Sub = O0;
- Add = O1;
- SubSibling = N0->getOperand(1 - i);
- AddSibling = N1->getOperand(1 - j);
- } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
- Add = O0;
- Sub = O1;
- AddSibling = N0->getOperand(1 - i);
- SubSibling = N1->getOperand(1 - j);
- } else
- continue;
-
- if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
- continue;
-
- // Constant ones is always righthand operand of the Add.
- if (!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
- continue;
-
- if (Sub.getOperand(1) != Add.getOperand(0))
- continue;
-
- return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
- }
- }
-
- // (or (and a b) (and (not a) c)) => (bsl a b c)
- // We only have to look for constant vectors here since the general, variable
- // case can be handled in TableGen.
- unsigned Bits = VT.getScalarSizeInBits();
- uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
- for (int i = 1; i >= 0; --i)
- for (int j = 1; j >= 0; --j) {
- APInt Val1, Val2;
-
- if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
- ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
- (BitMask & ~Val1.getZExtValue()) == Val2.getZExtValue()) {
- return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
- N0->getOperand(1 - i), N1->getOperand(1 - j));
- }
- BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
- BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
- if (!BVN0 || !BVN1)
- continue;
-
- bool FoundMatch = true;
- for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
- ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
- ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
- if (!CN0 || !CN1 ||
- CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
- FoundMatch = false;
- break;
- }
- }
- if (FoundMatch)
- return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
- N0->getOperand(1 - i), N1->getOperand(1 - j));
- }
-
- return SDValue();
-}
-
// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
// convert to csel(ccmp(.., cc0)), depending on cc1:
@@ -19595,17 +19592,10 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget,
const AArch64TargetLowering &TLI) {
SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- if (SDValue Res = tryCombineToBSL(N, DCI, TLI))
- return Res;
-
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index b681e3b223117..7872c027aff2b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -155,16 +155,12 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
; SDAG: // %bb.0:
; SDAG-NEXT: movi v1.16b, #171
; SDAG-NEXT: adrp x8, .LCPI4_0
-; SDAG-NEXT: adrp x9, .LCPI4_1
-; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_1]
; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b
; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
-; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
; SDAG-NEXT: ushr v1.16b, v1.16b, #7
-; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
-; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT: bif v0.16b, v1.16b, v2.16b
; SDAG-NEXT: ret
;
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
@@ -192,7 +188,6 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; SDAG-LABEL: pr38477:
; SDAG: // %bb.0:
; SDAG-NEXT: adrp x8, .LCPI5_0
-; SDAG-NEXT: adrp x9, .LCPI5_4
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
; SDAG-NEXT: adrp x8, .LCPI5_1
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
@@ -203,16 +198,13 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h
; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h
; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h
-; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4]
-; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h
; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
; SDAG-NEXT: adrp x8, .LCPI5_3
; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
-; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
-; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT: bif v0.16b, v1.16b, v2.16b
; SDAG-NEXT: ret
;
; GISEL-LABEL: pr38477:
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index e1ba0e98a6c01..ebf81b67b77e7 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -230,14 +230,11 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; CHECK-SD-NEXT: movi v3.2d, #0x000000000000ff
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
; CHECK-SD-NEXT: adrp x8, .LCPI14_1
-; CHECK-SD-NEXT: movi v4.2d, #0xffffffffffffff00
; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_1]
; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: bif v0.16b, v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
@@ -265,21 +262,17 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI15_1
+; CHECK-SD-NEXT: adrp x8, .LCPI15_0
; CHECK-SD-NEXT: cmlt v1.8h, v0.8h, #0
-; CHECK-SD-NEXT: adrp x9, .LCPI15_3
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI15_1
+; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
; CHECK-SD-NEXT: adrp x8, .LCPI15_2
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI15_3]
-; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI15_0
; CHECK-SD-NEXT: add v1.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: sshl v1.8h, v1.8h, v2.8h
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_2]
+; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
@@ -308,28 +301,22 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI16_1
+; CHECK-SD-NEXT: adrp x8, .LCPI16_0
; CHECK-SD-NEXT: cmlt v2.8h, v0.8h, #0
; CHECK-SD-NEXT: cmlt v3.8h, v1.8h, #0
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_1]
-; CHECK-SD-NEXT: adrp x8, .LCPI16_2
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI16_1
; CHECK-SD-NEXT: ushl v2.8h, v2.8h, v4.8h
; CHECK-SD-NEXT: ushl v3.8h, v3.8h, v4.8h
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI16_0
-; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI16_0]
-; CHECK-SD-NEXT: adrp x8, .LCPI16_3
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_1]
+; CHECK-SD-NEXT: adrp x8, .LCPI16_2
; CHECK-SD-NEXT: add v2.8h, v0.8h, v2.8h
; CHECK-SD-NEXT: add v3.8h, v1.8h, v3.8h
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v5.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v5.16b
; CHECK-SD-NEXT: sshl v2.8h, v2.8h, v4.8h
; CHECK-SD-NEXT: sshl v3.8h, v3.8h, v4.8h
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_3]
-; CHECK-SD-NEXT: and v2.16b, v2.16b, v4.16b
-; CHECK-SD-NEXT: and v3.16b, v3.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
+; CHECK-SD-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT: bif v1.16b, v3.16b, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
@@ -363,42 +350,32 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI17_1
+; CHECK-SD-NEXT: adrp x8, .LCPI17_0
; CHECK-SD-NEXT: cmlt v4.8h, v0.8h, #0
; CHECK-SD-NEXT: cmlt v5.8h, v1.8h, #0
; CHECK-SD-NEXT: cmlt v7.8h, v2.8h, #0
; CHECK-SD-NEXT: cmlt v16.8h, v3.8h, #0
-; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI17_1]
-; CHECK-SD-NEXT: adrp x8, .LCPI17_2
+; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI17_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI17_1
; CHECK-SD-NEXT: ushl v4.8h, v4.8h, v6.8h
; CHECK-SD-NEXT: ushl v5.8h, v5.8h, v6.8h
; CHECK-SD-NEXT: ushl v7.8h, v7.8h, v6.8h
; CHECK-SD-NEXT: ushl v6.8h, v16.8h, v6.8h
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI17_0
+; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_1]
+; CHECK-SD-NEXT: adrp x8, .LCPI17_2
; CHECK-SD-NEXT: add v4.8h, v0.8h, v4.8h
; CHECK-SD-NEXT: add v5.8h, v1.8h, v5.8h
-; CHECK-SD-NEXT: ldr q17, [x8, :lo12:.LCPI17_0]
; CHECK-SD-NEXT: add v7.8h, v2.8h, v7.8h
; CHECK-SD-NEXT: add v6.8h, v3.8h, v6.8h
-; CHECK-SD-NEXT: adrp x8, .LCPI17_3
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v17.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v17.16b
-; CHECK-SD-NEXT: and v2.16b, v2.16b, v17.16b
; CHECK-SD-NEXT: sshl v4.8h, v4.8h, v16.8h
; CHECK-SD-NEXT: sshl v5.8h, v5.8h, v16.8h
-; CHECK-SD-NEXT: and v3.16b, v3.16b, v17.16b
; CHECK-SD-NEXT: sshl v7.8h, v7.8h, v16.8h
; CHECK-SD-NEXT: sshl v6.8h, v6.8h, v16.8h
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_3]
-; CHECK-SD-NEXT: and v4.16b, v4.16b, v16.16b
-; CHECK-SD-NEXT: and v5.16b, v5.16b, v16.16b
-; CHECK-SD-NEXT: and v7.16b, v7.16b, v16.16b
-; CHECK-SD-NEXT: and v6.16b, v6.16b, v16.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v4.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v5.16b
-; CHECK-SD-NEXT: orr v2.16b, v2.16b, v7.16b
-; CHECK-SD-NEXT: orr v3.16b, v3.16b, v6.16b
+; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_2]
+; CHECK-SD-NEXT: bif v0.16b, v4.16b, v16.16b
+; CHECK-SD-NEXT: bif v1.16b, v5.16b, v16.16b
+; CHECK-SD-NEXT: bif v2.16b, v7.16b, v16.16b
+; CHECK-SD-NEXT: bif v3.16b, v6.16b, v16.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
@@ -904,29 +881,21 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; CHECK-SD-LABEL: non_splat_minus_one_divisor_1:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI26_1
+; CHECK-SD-NEXT: adrp x8, .LCPI26_0
; CHECK-SD-NEXT: cmlt v1.16b, v0.16b, #0
-; CHECK-SD-NEXT: adrp x9, .LCPI26_3
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI26_1
+; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_1]
; CHECK-SD-NEXT: adrp x8, .LCPI26_2
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_3]
-; CHECK-SD-NEXT: adrp x9, .LCPI26_5
-; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI26_0
; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
-; CHECK-SD-NEXT: adrp x8, .LCPI26_4
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_4]
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_5]
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_2]
+; CHECK-SD-NEXT: adrp x8, .LCPI26_3
+; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_3]
; CHECK-SD-NEXT: neg v1.16b, v0.16b
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: non_splat_minus_one_divisor_1:
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 7e6f3548bdaf5..0c84468f3934b 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -884,8 +884,10 @@ define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) {
define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
; CHECK-SD-LABEL: bsl2xi32_const:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0x000000ffffffff
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bsl2xi32_const:
@@ -923,8 +925,10 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
; CHECK-SD-LABEL: bsl1xi64_const:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0xffffffffffffff00
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov v0.b[0], v1.b[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bsl1xi64_const:
@@ -981,12 +985,17 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
}
def...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/151855
More information about the llvm-commits
mailing list