[llvm] d9d71bd - [AArch64] Move BSL generation to lowering. (#151855)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 01:54:45 PDT 2025
Author: David Green
Date: 2025-08-21T09:54:42+01:00
New Revision: d9d71bdc14c510a258f9ad61be9ad4a3db735f49
URL: https://github.com/llvm/llvm-project/commit/d9d71bdc14c510a258f9ad61be9ad4a3db735f49
DIFF: https://github.com/llvm/llvm-project/commit/d9d71bdc14c510a258f9ad61be9ad4a3db735f49.diff
LOG: [AArch64] Move BSL generation to lowering. (#151855)
It is generally better to allow the target independent combines before
creating AArch64 specific nodes (providing they don't mess it up). This
moves the generation of BSL nodes to lowering, not a combine, so that
intermediate nodes are more likely to be optimized. There is a small
change in the constant handling to detect legalized buildvector
arguments correctly.
Fixes #149380 but not directly. #151856 contained a direct fix for
expanding the pseudos.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
llvm/test/CodeGen/AArch64/combine-sdiv.ll
llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d168cc8d1bd06..f6b214078f581 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1121,7 +1121,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
- // We combine OR nodes for bitfield operations.
+ // We combine OR nodes for ccmp operations.
setTargetDAGCombine(ISD::OR);
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
@@ -14799,23 +14799,15 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
return ResultSLI;
}
-static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
- const AArch64TargetLowering &TLI) {
+static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- SelectionDAG &DAG = DCI.DAG;
+ assert(VT.isVector() && "Expected vector type in tryLowerToBSL\n");
SDLoc DL(N);
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
- if (!VT.isVector())
- return SDValue();
-
if (VT.isScalableVector() && !Subtarget.hasSVE2())
return SDValue();
- if (VT.isFixedLengthVector() &&
- (!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT)))
- return SDValue();
-
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
@@ -14865,14 +14857,13 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// We only have to look for constant vectors here since the general, variable
// case can be handled in TableGen.
unsigned Bits = VT.getScalarSizeInBits();
- uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
for (int i = 1; i >= 0; --i)
for (int j = 1; j >= 0; --j) {
APInt Val1, Val2;
if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
- (BitMask & ~Val1.getZExtValue()) == Val2.getZExtValue()) {
+ ~Val1.trunc(Bits) == Val2.trunc(Bits)) {
return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
N0->getOperand(1 - i), N1->getOperand(1 - j));
}
@@ -14886,7 +14877,8 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
if (!CN0 || !CN1 ||
- CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
+ CN0->getAPIntValue().trunc(Bits) !=
+ ~CN1->getAsAPIntVal().trunc(Bits)) {
FoundMatch = false;
break;
}
@@ -14905,6 +14897,9 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
!Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
+ if (SDValue Res = tryLowerToBSL(Op, DAG))
+ return Res;
+
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;
@@ -19658,17 +19653,10 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget,
const AArch64TargetLowering &TLI) {
SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- if (SDValue Res = tryCombineToBSL(N, DCI, TLI))
- return Res;
-
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index b681e3b223117..7872c027aff2b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -155,16 +155,12 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
; SDAG: // %bb.0:
; SDAG-NEXT: movi v1.16b, #171
; SDAG-NEXT: adrp x8, .LCPI4_0
-; SDAG-NEXT: adrp x9, .LCPI4_1
-; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_1]
; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b
; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
-; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
; SDAG-NEXT: ushr v1.16b, v1.16b, #7
-; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
-; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT: bif v0.16b, v1.16b, v2.16b
; SDAG-NEXT: ret
;
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
@@ -192,7 +188,6 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; SDAG-LABEL: pr38477:
; SDAG: // %bb.0:
; SDAG-NEXT: adrp x8, .LCPI5_0
-; SDAG-NEXT: adrp x9, .LCPI5_4
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
; SDAG-NEXT: adrp x8, .LCPI5_1
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
@@ -203,16 +198,13 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h
; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h
; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h
-; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4]
-; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h
; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
; SDAG-NEXT: adrp x8, .LCPI5_3
; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
-; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
-; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT: bif v0.16b, v1.16b, v2.16b
; SDAG-NEXT: ret
;
; GISEL-LABEL: pr38477:
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index 6208a697cab11..9d0ade2480428 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -230,14 +230,11 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; CHECK-SD-NEXT: movi v3.2d, #0x000000000000ff
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
; CHECK-SD-NEXT: adrp x8, .LCPI14_1
-; CHECK-SD-NEXT: movi v4.2d, #0xffffffffffffff00
; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_1]
; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: bif v0.16b, v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
@@ -265,21 +262,17 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI15_1
+; CHECK-SD-NEXT: adrp x8, .LCPI15_0
; CHECK-SD-NEXT: cmlt v1.8h, v0.8h, #0
-; CHECK-SD-NEXT: adrp x9, .LCPI15_3
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI15_1
+; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
; CHECK-SD-NEXT: adrp x8, .LCPI15_2
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI15_3]
-; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI15_0
; CHECK-SD-NEXT: add v1.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: sshl v1.8h, v1.8h, v2.8h
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_2]
+; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
@@ -308,28 +301,22 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI16_1
+; CHECK-SD-NEXT: adrp x8, .LCPI16_0
; CHECK-SD-NEXT: cmlt v2.8h, v0.8h, #0
; CHECK-SD-NEXT: cmlt v3.8h, v1.8h, #0
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_1]
-; CHECK-SD-NEXT: adrp x8, .LCPI16_2
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI16_1
; CHECK-SD-NEXT: ushl v2.8h, v2.8h, v4.8h
; CHECK-SD-NEXT: ushl v3.8h, v3.8h, v4.8h
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI16_0
-; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI16_0]
-; CHECK-SD-NEXT: adrp x8, .LCPI16_3
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_1]
+; CHECK-SD-NEXT: adrp x8, .LCPI16_2
; CHECK-SD-NEXT: add v2.8h, v0.8h, v2.8h
; CHECK-SD-NEXT: add v3.8h, v1.8h, v3.8h
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v5.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v5.16b
; CHECK-SD-NEXT: sshl v2.8h, v2.8h, v4.8h
; CHECK-SD-NEXT: sshl v3.8h, v3.8h, v4.8h
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_3]
-; CHECK-SD-NEXT: and v2.16b, v2.16b, v4.16b
-; CHECK-SD-NEXT: and v3.16b, v3.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
+; CHECK-SD-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT: bif v1.16b, v3.16b, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
@@ -363,42 +350,32 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI17_1
+; CHECK-SD-NEXT: adrp x8, .LCPI17_0
; CHECK-SD-NEXT: cmlt v4.8h, v0.8h, #0
; CHECK-SD-NEXT: cmlt v5.8h, v1.8h, #0
; CHECK-SD-NEXT: cmlt v7.8h, v2.8h, #0
; CHECK-SD-NEXT: cmlt v16.8h, v3.8h, #0
-; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI17_1]
-; CHECK-SD-NEXT: adrp x8, .LCPI17_2
+; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI17_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI17_1
; CHECK-SD-NEXT: ushl v4.8h, v4.8h, v6.8h
; CHECK-SD-NEXT: ushl v5.8h, v5.8h, v6.8h
; CHECK-SD-NEXT: ushl v7.8h, v7.8h, v6.8h
; CHECK-SD-NEXT: ushl v6.8h, v16.8h, v6.8h
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI17_0
+; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_1]
+; CHECK-SD-NEXT: adrp x8, .LCPI17_2
; CHECK-SD-NEXT: add v4.8h, v0.8h, v4.8h
; CHECK-SD-NEXT: add v5.8h, v1.8h, v5.8h
-; CHECK-SD-NEXT: ldr q17, [x8, :lo12:.LCPI17_0]
; CHECK-SD-NEXT: add v7.8h, v2.8h, v7.8h
; CHECK-SD-NEXT: add v6.8h, v3.8h, v6.8h
-; CHECK-SD-NEXT: adrp x8, .LCPI17_3
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v17.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v17.16b
-; CHECK-SD-NEXT: and v2.16b, v2.16b, v17.16b
; CHECK-SD-NEXT: sshl v4.8h, v4.8h, v16.8h
; CHECK-SD-NEXT: sshl v5.8h, v5.8h, v16.8h
-; CHECK-SD-NEXT: and v3.16b, v3.16b, v17.16b
; CHECK-SD-NEXT: sshl v7.8h, v7.8h, v16.8h
; CHECK-SD-NEXT: sshl v6.8h, v6.8h, v16.8h
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_3]
-; CHECK-SD-NEXT: and v4.16b, v4.16b, v16.16b
-; CHECK-SD-NEXT: and v5.16b, v5.16b, v16.16b
-; CHECK-SD-NEXT: and v7.16b, v7.16b, v16.16b
-; CHECK-SD-NEXT: and v6.16b, v6.16b, v16.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v4.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v5.16b
-; CHECK-SD-NEXT: orr v2.16b, v2.16b, v7.16b
-; CHECK-SD-NEXT: orr v3.16b, v3.16b, v6.16b
+; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_2]
+; CHECK-SD-NEXT: bif v0.16b, v4.16b, v16.16b
+; CHECK-SD-NEXT: bif v1.16b, v5.16b, v16.16b
+; CHECK-SD-NEXT: bif v2.16b, v7.16b, v16.16b
+; CHECK-SD-NEXT: bif v3.16b, v6.16b, v16.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
@@ -904,29 +881,21 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; CHECK-SD-LABEL: non_splat_minus_one_divisor_1:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI26_1
+; CHECK-SD-NEXT: adrp x8, .LCPI26_0
; CHECK-SD-NEXT: cmlt v1.16b, v0.16b, #0
-; CHECK-SD-NEXT: adrp x9, .LCPI26_3
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
+; CHECK-SD-NEXT: adrp x8, .LCPI26_1
+; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_1]
; CHECK-SD-NEXT: adrp x8, .LCPI26_2
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_3]
-; CHECK-SD-NEXT: adrp x9, .LCPI26_5
-; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_2]
-; CHECK-SD-NEXT: adrp x8, .LCPI26_0
; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
-; CHECK-SD-NEXT: adrp x8, .LCPI26_4
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_4]
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_5]
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_2]
+; CHECK-SD-NEXT: adrp x8, .LCPI26_3
+; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_3]
; CHECK-SD-NEXT: neg v1.16b, v0.16b
-; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: non_splat_minus_one_divisor_1:
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 7e6f3548bdaf5..0c84468f3934b 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -884,8 +884,10 @@ define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) {
define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
; CHECK-SD-LABEL: bsl2xi32_const:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0x000000ffffffff
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bsl2xi32_const:
@@ -923,8 +925,10 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
; CHECK-SD-LABEL: bsl1xi64_const:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0xffffffffffffff00
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov v0.b[0], v1.b[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bsl1xi64_const:
@@ -981,12 +985,17 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
}
define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: bsl2xi64_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI75_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI75_0]
-; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bsl2xi64_const:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bsl2xi64_const:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI75_0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI75_0]
+; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ret
%tmp1 = and <2 x i64> %a, < i64 -1, i64 0 >
%tmp2 = and <2 x i64> %b, < i64 0, i64 -1 >
%tmp3 = or <2 x i64> %tmp1, %tmp2
@@ -1158,11 +1167,8 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) {
define <8 x i8> @vselect_constant_cond_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-LABEL: vselect_constant_cond_v8i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0xffffffffff00ff00
-; CHECK-SD-NEXT: movi d3, #0x00000000ff00ff
-; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: and v0.8b, v0.8b, v3.8b
-; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: movi d2, #0x00000000ff00ff
+; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vselect_constant_cond_v8i8:
@@ -1180,11 +1186,8 @@ define <8 x i8> @vselect_constant_cond_v8i8(<8 x i8> %a, <8 x i8> %b) {
define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-SD-LABEL: vselect_constant_cond_v4i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0x00ffffffff0000
-; CHECK-SD-NEXT: movi d3, #0xffff00000000ffff
-; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: and v0.8b, v0.8b, v3.8b
-; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: movi d2, #0xffff00000000ffff
+; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vselect_constant_cond_v4i16:
@@ -2839,3 +2842,33 @@ define <2 x i64> @orr64imm8h_lsl8(<2 x i64> %a) {
ret <2 x i64> %tmp1
}
+define <8 x i16> @pr149380(<4 x i16> %u1, <1 x i64> %u2, <8 x i16> %vqshlu_n169) {
+; CHECK-SD-LABEL: pr149380:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: orr v2.8h, #1
+; CHECK-SD-NEXT: sqadd v0.8h, v2.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pr149380:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: neg v1.8h, v2.8h
+; CHECK-GI-NEXT: movi v3.8h, #1
+; CHECK-GI-NEXT: neg v1.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: sqadd v0.8h, v3.8h, v0.8h
+; CHECK-GI-NEXT: ret
+entry:
+ %mul.i = mul <8 x i16> %vqshlu_n169, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ %sub.i = sub <8 x i16> zeroinitializer, %mul.i
+ %vbsl3.i = and <8 x i16> %sub.i, %vqshlu_n169
+ %0 = add <8 x i16> %mul.i, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ %vbsl4.i = and <8 x i16> %0, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >
+ %vbsl5.i = or <8 x i16> %vbsl3.i, %vbsl4.i
+ %vqaddq_v2.i26515 = tail call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16> %vbsl5.i)
+ ret <8 x i16> %vqaddq_v2.i26515
+}
diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
index 468a33ce5bfcf..4be8c3775c704 100644
--- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
@@ -88,7 +88,6 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; CHECK-LABEL: dont_fold_urem_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: movi d4, #0x0000000000ffff
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: adrp x8, .LCPI4_1
; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1]
@@ -97,16 +96,14 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: sub v2.4h, v0.4h, v1.4h
; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h
-; CHECK-NEXT: movi d3, #0xffffffffffff0000
+; CHECK-NEXT: movi d3, #0x0000000000ffff
; CHECK-NEXT: shrn v2.4h, v2.4s, #16
; CHECK-NEXT: add v1.4h, v2.4h, v1.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: adrp x8, .LCPI4_3
; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: and v2.8b, v0.8b, v4.8b
-; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
-; CHECK-NEXT: orr v1.8b, v2.8b, v1.8b
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3]
+; CHECK-NEXT: bit v1.8b, v0.8b, v3.8b
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
%1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
More information about the llvm-commits
mailing list