[llvm] [AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (PR #88413)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 11 09:41:24 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Dinar Temirbulatov (dtemirbulatov)
<details>
<summary>Changes</summary>
Allow to fold or/and-and to BSL instuction for scalable vectors.
---
Full diff: https://github.com/llvm/llvm-project/pull/88413.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+4-2)
- (added) llvm/test/CodeGen/AArch64/sve2-bitselect.ll (+254)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 80181a77c9d238..d9aabb64125a4f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17942,11 +17942,13 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
} else
continue;
- if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
+ if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()) &&
+ !ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
continue;
// Constant ones is always righthand operand of the Add.
- if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
+ if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()) &&
+ !ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
continue;
if (Sub.getOperand(1) != Add.getOperand(0))
diff --git a/llvm/test/CodeGen/AArch64/sve2-bitselect.ll b/llvm/test/CodeGen/AArch64/sve2-bitselect.ll
new file mode 100644
index 00000000000000..9ceeffc2e5d2ab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-bitselect.ll
@@ -0,0 +1,254 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64"
+
+; Check that an expanded vbsl(vneg(pre_cond), left, right) lowers to a VBSL
+; during ISEL.
+;
+; Subtly different from a plain vector bit select: operand representing the
+; condition has been negated (-v, not to be confused with bitwise_not(v)).
+
+; Each vbsl_neg_cond_xxxx tests one of the 16 permutations of the operands.
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0000:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+ %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+ %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
+ ret <vscale x 4 x i32> %bsl0000
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0001(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0001:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+ %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+ %bsl0001 = or <vscale x 4 x i32> %right_bits_0, %left_bits_1
+ ret <vscale x 4 x i32> %bsl0001
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0010(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0010:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+ %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+ %bsl0010 = or <vscale x 4 x i32> %right_bits_1, %left_bits_0
+ ret <vscale x 4 x i32> %bsl0010
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0011(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0011:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+ %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+ %bsl0011 = or <vscale x 4 x i32> %right_bits_1, %left_bits_1
+ ret <vscale x 4 x i32> %bsl0011
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0100(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0100:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+ %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+ %bsl0100 = or <vscale x 4 x i32> %left_bits_0, %right_bits_0
+ ret <vscale x 4 x i32> %bsl0100
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0101(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0101:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+ %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+ %bsl0101 = or <vscale x 4 x i32> %left_bits_0, %right_bits_1
+ ret <vscale x 4 x i32> %bsl0101
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0110(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0110:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+ %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+ %bsl0110 = or <vscale x 4 x i32> %left_bits_1, %right_bits_0
+ ret <vscale x 4 x i32> %bsl0110
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0111(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0111:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+ %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+ %bsl0111 = or <vscale x 4 x i32> %left_bits_1, %right_bits_1
+ ret <vscale x 4 x i32> %bsl0111
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1000:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+ %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+ %bsl1000 = or <vscale x 4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_0
+ ret <vscale x 4 x i32> %bsl1000
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1001(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1001:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+ %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+ %bsl1001 = or <vscale x 4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_1
+ ret <vscale x 4 x i32> %bsl1001
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1010(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1010:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+ %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+ %bsl1010 = or <vscale x 4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_0
+ ret <vscale x 4 x i32> %bsl1010
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1011(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1011:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+ %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+ %bsl1011 = or <vscale x 4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_1
+ ret <vscale x 4 x i32> %bsl1011
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1100(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1100:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+ %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+ %bsl1100 = or <vscale x 4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_0
+ ret <vscale x 4 x i32> %bsl1100
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1101(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1101:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+ %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+ %bsl1101 = or <vscale x 4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_1
+ ret <vscale x 4 x i32> %bsl1101
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1110(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1110:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+ %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+ %bsl1110 = or <vscale x 4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_0
+ ret <vscale x 4 x i32> %bsl1110
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1111(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1111:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+ %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+ %bsl1111 = or <vscale x 4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_1
+ ret <vscale x 4 x i32> %bsl1111
+}
+
+attributes #0 = { "target-features"="+sve2" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/88413
More information about the llvm-commits
mailing list