[llvm] 8193f0d - [ARM] Add tablegen patterns for bf16 vrev

Sun Oct 2 05:42:23 PDT 2022

Author: David Green
Date: 2022-10-02T13:42:14+01:00
New Revision: 8193f0d1d26b7b4d602dcd5581b40382e4b6cf36

URL: https://github.com/llvm/llvm-project/commit/8193f0d1d26b7b4d602dcd5581b40382e4b6cf36
DIFF: https://github.com/llvm/llvm-project/commit/8193f0d1d26b7b4d602dcd5581b40382e4b6cf36.diff

LOG: [ARM] Add tablegen patterns for bf16 vrev

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMInstrNEON.td
    llvm/test/CodeGen/ARM/bf16-shuffle.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 1f4d16a67055..d9605d05b883 100644

--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -6982,6 +6982,10 @@ let Predicates = [HasNEON] in {
             (VREV64q16 QPR:$Vm)>;
   def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
             (VREV64d16 DPR:$Vm)>;
+  def : Pat<(v8bf16 (ARMvrev64 (v8bf16 QPR:$Vm))),
+            (VREV64q16 QPR:$Vm)>;
+  def : Pat<(v4bf16 (ARMvrev64 (v4bf16 DPR:$Vm))),
+            (VREV64d16 DPR:$Vm)>;
 }
 
 //   VREV32   : Vector Reverse elements within 32-bit words
@@ -7008,6 +7012,10 @@ let Predicates = [HasNEON] in {
             (VREV32q16 QPR:$Vm)>;
   def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
             (VREV32d16 DPR:$Vm)>;
+  def : Pat<(v8bf16 (ARMvrev32 (v8bf16 QPR:$Vm))),
+            (VREV32q16 QPR:$Vm)>;
+  def : Pat<(v4bf16 (ARMvrev32 (v4bf16 DPR:$Vm))),
+            (VREV32d16 DPR:$Vm)>;
 }
 
 //   VREV16   : Vector Reverse elements within 16-bit halfwords

diff  --git a/llvm/test/CodeGen/ARM/bf16-shuffle.ll b/llvm/test/CodeGen/ARM/bf16-shuffle.ll
index 726eb75332c3..08698e2f79c3 100644
--- a/llvm/test/CodeGen/ARM/bf16-shuffle.ll
+++ b/llvm/test/CodeGen/ARM/bf16-shuffle.ll
@@ -233,17 +233,45 @@ entry:
   ret <8 x bfloat> %vext
 }
 
-;define dso_local <4 x bfloat> @test_vrev64_bf16(<4 x bfloat> %a) {
-;entry:
-;  %shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-;  ret <4 x bfloat> %shuffle.i
-;}
+define dso_local <4 x bfloat> @test_vrev64_bf16(<4 x bfloat> %a) {
+; CHECK-LABEL: test_vrev64_bf16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrev64.16 d0, d0
+; CHECK-NEXT:    bx lr
+entry:
+  %shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x bfloat> %shuffle.i
+}
 
-;define dso_local <8 x bfloat> @test_vrev64q_bf16(<8 x bfloat> %a) {
-;entry:
-;  %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-;  ret <8 x bfloat> %shuffle.i
-;}
+define dso_local <8 x bfloat> @test_vrev64q_bf16(<8 x bfloat> %a) {
+; CHECK-LABEL: test_vrev64q_bf16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrev64.16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x bfloat> %shuffle.i
+}
+
+define dso_local <4 x bfloat> @test_vrev32_bf16(<4 x bfloat> %a) {
+; CHECK-LABEL: test_vrev32_bf16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrev32.16 d0, d0
+; CHECK-NEXT:    bx lr
+entry:
+  %shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x bfloat> %shuffle.i
+}
+
+define dso_local <8 x bfloat> @test_vrev32q_bf16(<8 x bfloat> %a) {
+; CHECK-LABEL: test_vrev32q_bf16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrev32.16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x bfloat> %shuffle.i
+}
 
 define <4 x bfloat> @test_vld_dup1_4xbfloat(bfloat* %b) {
 ; CHECK-LABEL: test_vld_dup1_4xbfloat: