[llvm] fb82b83 - [AArch64][SME] Support NEON scalar FP instructions in streaming mode
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 23 02:24:31 PDT 2021
Author: Cullen Rhodes
Date: 2021-08-23T08:48:34Z
New Revision: fb82b836b738829a4f006f475723452cf6969280
URL: https://github.com/llvm/llvm-project/commit/fb82b836b738829a4f006f475723452cf6969280
DIFF: https://github.com/llvm/llvm-project/commit/fb82b836b738829a4f006f475723452cf6969280.diff
LOG: [AArch64][SME] Support NEON scalar FP instructions in streaming mode
The following scalar FP instructions are legal in streaming mode:
0101 1110 xx1x xxxx 11x1 11xx xxxx xxxx # FMULX/FRECPS/FRSQRTS (scalar)
0101 1110 x10x xxxx 00x1 11xx xxxx xxxx # FMULX/FRECPS/FRSQRTS (scalar, FP16)
01x1 1110 1x10 0001 11x1 10xx xxxx xxxx # FRECPE/FRSQRTE/FRECPX (scalar)
01x1 1110 1111 1001 11x1 10xx xxxx xxxx # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
Predicate them on `HasNEONorStreamingSVE`. Full list of affected
instructions:
FMULX16, FMULX32, FMULX64, FRECPS16, FRECPS32, FRECPS64, FRSQRTS16,
FRSQRTS32, FRSQRTS64, FRECPEv1f16, FRECPEv1i32, FRECPEv1i64, FRECPXv1f16,
FRECPXv1i32, FRECPXv1i64, FRSQRTEv1f16, FRSQRTEv1i32, FRSQRTEv1i64
Depends on D107902.
The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06/SIMD-FP-Instructions
Execution of NEON instructions that are illegal in streaming mode will
cause a trap or exception. Using FMULX [1] as an example, this check is
at the top of the pseudocode:
if elements == 1 then
CheckFPEnabled64();
else
CheckFPAdvSIMDEnabled64();
For the legal scalar variants it calls `CheckFPEnabled64`, whereas for the
illegal vector variants it calls `CheckFPAdvSIMDEnabled64` which traps.
This is useful for observing which instructions are/aren't legal
in streaming mode.
[1] https://developer.arm.com/documentation/ddi0602/2021-06/SIMD-FP-Instructions/FMULX--Floating-point-Multiply-extended-
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D108039
Added:
llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s
llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
llvm/test/MC/AArch64/SME/streaming-mode-neon.s
Modified:
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 9bc2539e95f08..10c6fcd5cacd7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6826,16 +6826,19 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
}
multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
+ SDPatternOperator OpNode = null_frag,
+ Predicate pred = HasNEON> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ let Predicates = [pred] in {
def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
def NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
- let Predicates = [HasNEON, HasFullFP16] in {
+ }
+ let Predicates = [pred, HasFullFP16] in {
def NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
[(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]>;
- } // Predicates = [HasNEON, HasFullFP16]
+ }
}
def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -7025,10 +7028,13 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
(!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
}
-multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
+ Predicate pred = HasNEON> {
+ let Predicates = [pred] in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
- let Predicates = [HasNEON, HasFullFP16] in {
+ }
+ let Predicates = [pred, HasFullFP16] in {
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0145da8e347fa..79a35246615f2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4624,9 +4624,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
-defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
+defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
+defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -4725,9 +4725,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
+defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
+defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
+defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s
new file mode 100644
index 0000000000000..9d838880c5609
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+fullfp16 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+streaming-sve,+fullfp16 < %s \
+// RUN: | llvm-objdump --mattr=+fullfp16 -d - | FileCheck %s --check-prefix=CHECK-INST
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+fullfp16 < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+streaming-sve,+fullfp16 -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Scalar FP instructions
+
+fmulx h0, h1, h2
+// CHECK-INST: fmulx h0, h1, h2
+// CHECK-ENCODING: [0x20,0x1c,0x42,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frecps h0, h1, h2
+// CHECK-INST: frecps h0, h1, h2
+// CHECK-ENCODING: [0x20,0x3c,0x42,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frsqrts h0, h1, h2
+// CHECK-INST: frsqrts h0, h1, h2
+// CHECK-ENCODING: [0x20,0x3c,0xc2,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frecpe h0, h1
+// CHECK-INST: frecpe h0, h1
+// CHECK-ENCODING: [0x20,0xd8,0xf9,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frecpx h0, h1
+// CHECK-INST: frecpx h0, h1
+// CHECK-ENCODING: [0x20,0xf8,0xf9,0x5e]
+// CHECK-ERROR: instruction requires: fullfp16
+
+frsqrte h0, h1
+// CHECK-INST: frsqrte h0, h1
+// CHECK-ENCODING: [0x20,0xd8,0xf9,0x7e]
+// CHECK-ERROR: instruction requires: fullfp16
diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
new file mode 100644
index 0000000000000..d700c9217611d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Check FABD is illegal in streaming mode
+
+fabd s0, s1, s2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: fabd s0, s1, s2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
new file mode 100644
index 0000000000000..7ad14f301006f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+streaming-sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+streaming-sve -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Scalar FP instructions
+
+fmulx s0, s1, s2
+// CHECK-INST: fmulx s0, s1, s2
+// CHECK-ENCODING: [0x20,0xdc,0x22,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+fmulx d0, d1, d2
+// CHECK-INST: fmulx d0, d1, d2
+// CHECK-ENCODING: [0x20,0xdc,0x62,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecps s0, s1, s2
+// CHECK-INST: frecps s0, s1, s2
+// CHECK-ENCODING: [0x20,0xfc,0x22,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecps d0, d1, d2
+// CHECK-INST: frecps d0, d1, d2
+// CHECK-ENCODING: [0x20,0xfc,0x62,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrts s0, s1, s2
+// CHECK-INST: frsqrts s0, s1, s2
+// CHECK-ENCODING: [0x20,0xfc,0xa2,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrts d0, d1, d2
+// CHECK-INST: frsqrts d0, d1, d2
+// CHECK-ENCODING: [0x20,0xfc,0xe2,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpe s0, s1
+// CHECK-INST: frecpe s0, s1
+// CHECK-ENCODING: [0x20,0xd8,0xa1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpe d0, d1
+// CHECK-INST: frecpe d0, d1
+// CHECK-ENCODING: [0x20,0xd8,0xe1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpx s0, s1
+// CHECK-INST: frecpx s0, s1
+// CHECK-ENCODING: [0x20,0xf8,0xa1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frecpx d0, d1
+// CHECK-INST: frecpx d0, d1
+// CHECK-ENCODING: [0x20,0xf8,0xe1,0x5e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrte s0, s1
+// CHECK-INST: frsqrte s0, s1
+// CHECK-ENCODING: [0x20,0xd8,0xa1,0x7e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
+
+frsqrte d0, d1
+// CHECK-INST: frsqrte d0, d1
+// CHECK-ENCODING: [0x20,0xd8,0xe1,0x7e]
+// CHECK-ERROR: instruction requires: streaming-sve or neon
More information about the llvm-commits
mailing list