[llvm] fb82b83 - [AArch64][SME] Support NEON scalar FP instructions in streaming mode

Mon Aug 23 02:24:31 PDT 2021

Author: Cullen Rhodes
Date: 2021-08-23T08:48:34Z
New Revision: fb82b836b738829a4f006f475723452cf6969280

URL: https://github.com/llvm/llvm-project/commit/fb82b836b738829a4f006f475723452cf6969280
DIFF: https://github.com/llvm/llvm-project/commit/fb82b836b738829a4f006f475723452cf6969280.diff

LOG: [AArch64][SME] Support NEON scalar FP instructions in streaming mode

The following scalar FP instructions are legal in streaming mode:

  0101 1110 xx1x xxxx 11x1 11xx xxxx xxxx # FMULX/FRECPS/FRSQRTS (scalar)
  0101 1110 x10x xxxx 00x1 11xx xxxx xxxx # FMULX/FRECPS/FRSQRTS (scalar, FP16)
  01x1 1110 1x10 0001 11x1 10xx xxxx xxxx # FRECPE/FRSQRTE/FRECPX (scalar)
  01x1 1110 1111 1001 11x1 10xx xxxx xxxx # FRECPE/FRSQRTE/FRECPX (scalar, FP16)

Predicate them on `HasNEONorStreamingSVE`. Full list of affected
instructions:

  FMULX16, FMULX32, FMULX64, FRECPS16, FRECPS32, FRECPS64, FRSQRTS16,
  FRSQRTS32, FRSQRTS64, FRECPEv1f16, FRECPEv1i32, FRECPEv1i64, FRECPXv1f16,
  FRECPXv1i32, FRECPXv1i64, FRSQRTEv1f16, FRSQRTEv1i32, FRSQRTEv1i64

Depends on D107902.

The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06/SIMD-FP-Instructions

Execution of NEON instructions that are illegal in streaming mode will
cause a trap or exception. Using FMULX [1] as an example, this check is
at the top of the pseudocode:

  if elements == 1 then
      CheckFPEnabled64();
  else
      CheckFPAdvSIMDEnabled64();

For the legal scalar variants it calls `CheckFPEnabled64`, whereas for the
illegal vector variants it calls `CheckFPAdvSIMDEnabled64` which traps.

This is useful for observing which instructions are/aren't legal
in streaming mode.

[1] https://developer.arm.com/documentation/ddi0602/2021-06/SIMD-FP-Instructions/FMULX--Floating-point-Multiply-extended-

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D108039

Added: 
    llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s
    llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
    llvm/test/MC/AArch64/SME/streaming-mode-neon.s

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 9bc2539e95f08..10c6fcd5cacd7 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6826,16 +6826,19 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
 }
 
 multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
-                             SDPatternOperator OpNode = null_frag> {
+                             SDPatternOperator OpNode = null_frag,
+                             Predicate pred = HasNEON> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+    let Predicates = [pred] in {
     def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
       [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
     def NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
       [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
-    let Predicates = [HasNEON, HasFullFP16] in {
+    }
+    let Predicates = [pred, HasFullFP16] in {
     def NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
       [(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]>;
-    } // Predicates = [HasNEON, HasFullFP16]
+    }
   }
 
   def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -7025,10 +7028,13 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
             (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
 }
 
-multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
+                           Predicate pred = HasNEON> {
+  let Predicates = [pred] in {
   def v1i64       : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
   def v1i32       : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
-  let Predicates = [HasNEON, HasFullFP16] in {
+  }
+  let Predicates = [pred, HasFullFP16] in {
   def v1f16       : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
   }
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0145da8e347fa..79a35246615f2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4624,9 +4624,9 @@ defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
 defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
 defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
 defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
-defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
+defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
+defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
 defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
 defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -4725,9 +4725,9 @@ defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
 def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
 defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
 defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
+defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
+defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
+defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
 defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
                                  UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
 defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;

diff  --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s
new file mode 100644
index 0000000000000..9d838880c5609
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+fullfp16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+streaming-sve,+fullfp16 < %s \
+// RUN:        | llvm-objdump --mattr=+fullfp16 -d - | FileCheck %s --check-prefix=CHECK-INST
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+fullfp16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+streaming-sve,+fullfp16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Scalar FP instructions
+
+fmulx h0, h1, h2
+// CHECK-INST: fmulx h0, h1, h2
+// CHECK-ENCODING: [0x20,0x1c,0x42,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frecps h0, h1, h2
+// CHECK-INST: frecps h0, h1, h2
+// CHECK-ENCODING: [0x20,0x3c,0x42,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frsqrts h0, h1, h2
+// CHECK-INST: frsqrts h0, h1, h2
+// CHECK-ENCODING: [0x20,0x3c,0xc2,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frecpe h0, h1
+// CHECK-INST: frecpe h0, h1
+// CHECK-ENCODING: [0x20,0xd8,0xf9,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frecpx h0, h1
+// CHECK-INST: frecpx h0, h1
+// CHECK-ENCODING: [0x20,0xf8,0xf9,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frsqrte h0, h1
+// CHECK-INST: frsqrte h0, h1
+// CHECK-ENCODING: [0x20,0xd8,0xf9,0x7e]
+// CHECK-ERROR: instruction requires: fullfp16

diff  --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
new file mode 100644
index 0000000000000..d700c9217611d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Check FABD is illegal in streaming mode
+
+fabd s0, s1, s2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: fabd s0, s1, s2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

diff  --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
new file mode 100644
index 0000000000000..7ad14f301006f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+streaming-sve < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+streaming-sve -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Scalar FP instructions
+
+fmulx s0, s1, s2
+// CHECK-INST: fmulx s0, s1, s2
+// CHECK-ENCODING: [0x20,0xdc,0x22,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+fmulx d0, d1, d2
+// CHECK-INST: fmulx d0, d1, d2
+// CHECK-ENCODING: [0x20,0xdc,0x62,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecps s0, s1, s2
+// CHECK-INST: frecps s0, s1, s2
+// CHECK-ENCODING: [0x20,0xfc,0x22,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecps d0, d1, d2
+// CHECK-INST: frecps d0, d1, d2
+// CHECK-ENCODING: [0x20,0xfc,0x62,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrts s0, s1, s2
+// CHECK-INST: frsqrts s0, s1, s2
+// CHECK-ENCODING: [0x20,0xfc,0xa2,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrts d0, d1, d2
+// CHECK-INST: frsqrts d0, d1, d2
+// CHECK-ENCODING: [0x20,0xfc,0xe2,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpe s0, s1
+// CHECK-INST: frecpe s0, s1
+// CHECK-ENCODING: [0x20,0xd8,0xa1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpe d0, d1
+// CHECK-INST: frecpe d0, d1
+// CHECK-ENCODING: [0x20,0xd8,0xe1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpx s0, s1
+// CHECK-INST: frecpx s0, s1
+// CHECK-ENCODING: [0x20,0xf8,0xa1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpx d0, d1
+// CHECK-INST: frecpx d0, d1
+// CHECK-ENCODING: [0x20,0xf8,0xe1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrte s0, s1
+// CHECK-INST: frsqrte s0, s1
+// CHECK-ENCODING: [0x20,0xd8,0xa1,0x7e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrte d0, d1
+// CHECK-INST: frsqrte d0, d1
+// CHECK-ENCODING: [0x20,0xd8,0xe1,0x7e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon