[llvm] 37a92f9 - [AArch64][SVE2] SVE2 NBSL instruction lowering. (#89732)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 26 09:07:29 PDT 2024
Author: Dinar Temirbulatov
Date: 2024-04-26T17:07:25+01:00
New Revision: 37a92f9f60fc2f77264b06c5602a61aaa5196edb
URL: https://github.com/llvm/llvm-project/commit/37a92f9f60fc2f77264b06c5602a61aaa5196edb
DIFF: https://github.com/llvm/llvm-project/commit/37a92f9f60fc2f77264b06c5602a61aaa5196edb.diff
LOG: [AArch64][SVE2] SVE2 NBSL instruction lowering. (#89732)
Allow to fold BSL/EOR instuctions to NBSL instruction for scalable
vectors.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve2-bsl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 17d96370c04a59..2159116d1ab7c4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -746,6 +746,8 @@ def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
+def AArch64nbsl: PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
+ (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>;
def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3331ba7f99a161..b90ac0ff1fe00a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3760,7 +3760,7 @@ let Predicates = [HasSVE2orSME] in {
defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl, AArch64bsp>;
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
- defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>;
+ defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl, AArch64nbsl>;
// SVE2 bitwise xor and rotate right by immediate
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>;
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 23b2622f5f5863..ef7d4abe5c5f4e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -41,3 +41,55 @@ define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32>
%c = or <vscale x 4 x i32> %1, %2
ret <vscale x 4 x i32> %c
}
+
+define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: nbsl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.b, #127 // =0x7f
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 16 x i8> %a, splat(i8 127)
+ %2 = and <vscale x 16 x i8> %b, splat(i8 -128)
+ %3 = or <vscale x 16 x i8> %1, %2
+ %4 = xor <vscale x 16 x i8> %3, splat(i8 -1)
+ ret <vscale x 16 x i8> %4
+}
+
+define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: nbsl_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.h, #32767 // =0x7fff
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 8 x i16> %a, splat(i16 32767)
+ %2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
+ %3 = or <vscale x 8 x i16> %1, %2
+ %4 = xor <vscale x 8 x i16> %3, splat(i16 -1)
+ ret <vscale x 8 x i16> %4
+}
+
+define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: nbsl_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.s, #0x7fffffff
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
+ %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
+ %3 = or <vscale x 4 x i32> %1, %2
+ %4 = xor <vscale x 4 x i32> %3, splat(i32 -1)
+ ret <vscale x 4 x i32> %4
+}
+
+define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: nbsl_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
+ %2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
+ %3 = or <vscale x 2 x i64> %1, %2
+ %4 = xor <vscale x 2 x i64> %3, splat(i64 -1)
+ ret <vscale x 2 x i64> %4
+}
More information about the llvm-commits
mailing list