[llvm] 990c4bc - [AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for bit-twiddling. (#83514)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 10 03:08:40 PDT 2024
Author: Dinar Temirbulatov
Date: 2024-04-10T11:07:59+01:00
New Revision: 990c4bc95f69afb63849898b6b25b13d49d6cdd4
URL: https://github.com/llvm/llvm-project/commit/990c4bc95f69afb63849898b6b25b13d49d6cdd4
DIFF: https://github.com/llvm/llvm-project/commit/990c4bc95f69afb63849898b6b25b13d49d6cdd4.diff
LOG: [AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for bit-twiddling. (#83514)
Allow to fold or/and-and to BSL instuction for scalable vectors.
Added:
llvm/test/CodeGen/AArch64/sve2-bsl.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 819e8ccd5c33f0..744b2cdef504d5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17930,16 +17930,14 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
if (!VT.isVector())
return SDValue();
- // The combining code currently only works for NEON vectors. In particular,
- // it does not work for SVE when dealing with vectors wider than 128 bits.
- // It also doesn't work for streaming mode because it causes generating
- // bsl instructions that are invalid in streaming mode.
- if (TLI.useSVEForFixedLengthVectorVT(
- VT, !DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable()))
+ // The combining code works for NEON, SVE2 and SME.
+ if (TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()) ||
+ (VT.isScalableVector() && !Subtarget.hasSVE2()))
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -17994,6 +17992,14 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
for (int i = 1; i >= 0; --i)
for (int j = 1; j >= 0; --j) {
+ APInt Val1, Val2;
+
+ if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
+ ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
+ (BitMask & ~Val1.getZExtValue()) == Val2.getZExtValue()) {
+ return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
+ N0->getOperand(1 - i), N1->getOperand(1 - j));
+ }
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
if (!BVN0 || !BVN1)
@@ -18009,9 +18015,8 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
break;
}
}
-
if (FoundMatch)
- return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
+ return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
N0->getOperand(1 - i), N1->getOperand(1 - j));
}
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
new file mode 100644
index 00000000000000..11f67634a3fb2c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK
+
+define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: bsl:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.s, #0x7fffffff
+; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
+ %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
+ %c = or <vscale x 4 x i32> %1, %2
+ ret <vscale x 4 x i32> %c
+}
+
+; we are not expecting bsl instruction here. the constants do not match to fold to bsl.
+define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: no_bsl_fold:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
+; CHECK-NEXT: and z1.s, z1.s, #0x7ffffffe
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
+ %2 = and <vscale x 4 x i32> %b, splat(i32 2147483646)
+ %c = or <vscale x 4 x i32> %1, %2
+ ret <vscale x 4 x i32> %c
+}
More information about the llvm-commits
mailing list