[llvm] d7105e7 - Teach the AArch64 backend to instruction select the BCAX instruction.

Wed Feb 23 15:59:46 PST 2022

Author: Owen Anderson
Date: 2022-02-23T15:59:40-08:00
New Revision: d7105e76319c992fcbcf4e5e174c06534b061fb7

URL: https://github.com/llvm/llvm-project/commit/d7105e76319c992fcbcf4e5e174c06534b061fb7
DIFF: https://github.com/llvm/llvm-project/commit/d7105e76319c992fcbcf4e5e174c06534b061fb7.diff

LOG: Teach the AArch64 backend to instruction select the BCAX instruction.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D120112

Added: 
    llvm/test/CodeGen/AArch64/bcax.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f3aff92d4bac5..8e1f61925794f 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1024,6 +1024,15 @@ def : EOR3_pattern<v8i16>;
 def : EOR3_pattern<v4i32>;
 def : EOR3_pattern<v2i64>;
 
+class BCAX_pattern<ValueType VecTy>
+  : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
+        (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
+
+def : BCAX_pattern<v16i8>;
+def : BCAX_pattern<v8i16>;
+def : BCAX_pattern<v4i32>;
+def : BCAX_pattern<v2i64>;
+
 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;

diff  --git a/llvm/test/CodeGen/AArch64/bcax.ll b/llvm/test/CodeGen/AArch64/bcax.ll
new file mode 100644
index 0000000000000..15e8b3421bf9b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bcax.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
+
+define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
+; SHA3-LABEL: bcax_64x2:
+; SHA3:       // %bb.0:
+; SHA3-NEXT:    bcax v0.16b, v2.16b, v0.16b, v1.16b
+; SHA3-NEXT:    ret
+;
+; NOSHA3-LABEL: bcax_64x2:
+; NOSHA3:       // %bb.0:
+; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
+; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
+; NOSHA3-NEXT:    ret
+  %4 = xor <2 x i64> %1, <i64 -1, i64 -1>
+  %5 = and <2 x i64> %4, %0
+  %6 = xor <2 x i64> %5, %2
+  ret <2 x i64> %6
+}
+
+define <4 x i32> @bcax_32x4(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
+; SHA3-LABEL: bcax_32x4:
+; SHA3:       // %bb.0:
+; SHA3-NEXT:    bcax v0.16b, v2.16b, v0.16b, v1.16b
+; SHA3-NEXT:    ret
+;
+; NOSHA3-LABEL: bcax_32x4:
+; NOSHA3:       // %bb.0:
+; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
+; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
+; NOSHA3-NEXT:    ret
+  %4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %5 = and <4 x i32> %4, %0
+  %6 = xor <4 x i32> %5, %2
+  ret <4 x i32> %6
+}
+
+define <8 x i16> @bcax_16x8(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
+; SHA3-LABEL: bcax_16x8:
+; SHA3:       // %bb.0:
+; SHA3-NEXT:    bcax v0.16b, v2.16b, v0.16b, v1.16b
+; SHA3-NEXT:    ret
+;
+; NOSHA3-LABEL: bcax_16x8:
+; NOSHA3:       // %bb.0:
+; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
+; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
+; NOSHA3-NEXT:    ret
+  %4 = xor <8 x i16> %1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %5 = and <8 x i16> %4, %0
+  %6 = xor <8 x i16> %5, %2
+  ret <8 x i16> %6
+}
+
+define <16 x i8> @bcax_8x16(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
+; SHA3-LABEL: bcax_8x16:
+; SHA3:       // %bb.0:
+; SHA3-NEXT:    bcax v0.16b, v2.16b, v0.16b, v1.16b
+; SHA3-NEXT:    ret
+;
+; NOSHA3-LABEL: bcax_8x16:
+; NOSHA3:       // %bb.0:
+; NOSHA3-NEXT:    bic v0.16b, v0.16b, v1.16b
+; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
+; NOSHA3-NEXT:    ret
+  %4 = xor <16 x i8> %1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %5 = and <16 x i8> %4, %0
+  %6 = xor <16 x i8> %5, %2
+  ret <16 x i8> %6
+}