[llvm] dc5dd77 - [AArch64][SME] Support NEON vector to GPR integer moves in streaming mode

Fri Sep 3 00:59:45 PDT 2021

Author: Cullen Rhodes
Date: 2021-09-03T07:59:17Z
New Revision: dc5dd77ac70089067236cf8071add95f153d5eba

URL: https://github.com/llvm/llvm-project/commit/dc5dd77ac70089067236cf8071add95f153d5eba
DIFF: https://github.com/llvm/llvm-project/commit/dc5dd77ac70089067236cf8071add95f153d5eba.diff

LOG: [AArch64][SME] Support NEON vector to GPR integer moves in streaming mode

A small subset of the NEON instruction set is legal in streaming mode.
This patch adds support for the following vector to integer move
instructions:

  0x00 1110 0000 0001 0010 11xx xxxx xxxx # SMOV W|Xd,Vn.B[0]
  0x00 1110 0000 0010 0010 11xx xxxx xxxx # SMOV W|Xd,Vn.H[0]
  0100 1110 0000 0100 0010 11xx xxxx xxxx # SMOV Xd,Vn.S[0]
  0000 1110 0000 0001 0011 11xx xxxx xxxx # UMOV Wd,Vn.B[0]
  0000 1110 0000 0010 0011 11xx xxxx xxxx # UMOV Wd,Vn.H[0]
  0000 1110 0000 0100 0011 11xx xxxx xxxx # UMOV Wd,Vn.S[0]
  0100 1110 0000 1000 0011 11xx xxxx xxxx # UMOV Xd,Vn.D[0]

Only the zero index variants are legal, all others indexes are illegal.
To support this, new instructions are defined specifically for zero
index which is hardcoded, along an implicit 'VectorIndex0' operand.
Since the index operand is implicit and takes no bits in the encoding,
custom decoding is required to add the operand.

I'm not sure if this is the best approach but the predicate constraint
on a subset of an operand is unusual. Would be interested to hear some
alternatives.

The instructions are predicated on 'HasNEONorStreamingSVE', i.e. they're
enabled by either +neon or +streaming-sve. This follows on from the work
in D106272 to support the subset of SVE(2) instructions that are legal
in streaming mode.

Depends on D107902.

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D107903

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
    llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
    llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
    llvm/test/MC/AArch64/SME/streaming-mode-neon.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 84d5ba057765d..855b35cae67a6 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1241,12 +1241,15 @@ multiclass VectorIndex<ValueType ty, AsmOperandClass mc, code pred> {
   def _timm : AsmVectorIndexOpnd<ty, mc>, TImmLeaf<ty, pred>;
 }
 
+def VectorIndex0Operand : AsmVectorIndex<0, 0>;
 def VectorIndex1Operand : AsmVectorIndex<1, 1>;
 def VectorIndexBOperand : AsmVectorIndex<0, 15>;
 def VectorIndexHOperand : AsmVectorIndex<0, 7>;
 def VectorIndexSOperand : AsmVectorIndex<0, 3>;
 def VectorIndexDOperand : AsmVectorIndex<0, 1>;
 
+defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
+                                [{ return ((uint64_t)Imm) == 0; }]>;
 defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
                                 [{ return ((uint64_t)Imm) == 1; }]>;
 defm VectorIndexB : VectorIndex<i64, VectorIndexBOperand,
@@ -7303,6 +7306,25 @@ class SIMDMovAlias<string asm, string size, Instruction inst,
                 (inst regtype:$dst, V128:$src, idxtype:$idx)>;
 
 multiclass SMov {
+  // SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
+  // streaming mode.
+  let Predicates = [HasNEONorStreamingSVE] in {
+    def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
+      let Inst{20-16} = 0b00001;
+    }
+    def vi8to64_idx0 : SIMDSMov<1, ".b", GPR64, VectorIndex0> {
+      let Inst{20-16} = 0b00001;
+    }
+    def vi16to32_idx0 : SIMDSMov<0, ".h", GPR32, VectorIndex0> {
+      let Inst{20-16} = 0b00010;
+    }
+    def vi16to64_idx0 : SIMDSMov<1, ".h", GPR64, VectorIndex0> {
+      let Inst{20-16} = 0b00010;
+    }
+    def vi32to64_idx0 : SIMDSMov<1, ".s", GPR64, VectorIndex0> {
+      let Inst{20-16} = 0b00100;
+    }
+  }
   def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> {
     bits<4> idx;
     let Inst{20-17} = idx;
@@ -7331,6 +7353,28 @@ multiclass SMov {
 }
 
 multiclass UMov {
+  // UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
+  // streaming mode.
+  let Predicates = [HasNEONorStreamingSVE] in {
+    def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
+      let Inst{20-16} = 0b00001;
+    }
+    def vi16_idx0 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndex0> {
+      let Inst{20-16} = 0b00010;
+    }
+    def vi32_idx0 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndex0> {
+      let Inst{20-16} = 0b00100;
+    }
+    def vi64_idx0 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndex0> {
+      let Inst{20-16} = 0b01000;
+    }
+    def : SIMDMovAlias<"mov", ".s",
+                       !cast<Instruction>(NAME # vi32_idx0),
+                       GPR32, VectorIndex0>;
+    def : SIMDMovAlias<"mov", ".d",
+                       !cast<Instruction>(NAME # vi64_idx0),
+                       GPR64, VectorIndex0>;
+  }
   def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> {
     bits<4> idx;
     let Inst{20-17} = idx;

diff  --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 2f93e81b070ca..1d4033b1f3766 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -5102,6 +5102,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
   case Match_InvalidSVECpyImm64:
     return Error(Loc, "immediate must be an integer in range [-128, 127] or a "
                       "multiple of 256 in range [-32768, 32512]");
+  case Match_InvalidIndexRange0_0:
+    return Error(Loc, "expected lane specifier '[0]'");
   case Match_InvalidIndexRange1_1:
     return Error(Loc, "expected lane specifier '[1]'");
   case Match_InvalidIndexRange0_15:
@@ -5711,6 +5713,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidSVECpyImm16:
   case Match_InvalidSVECpyImm32:
   case Match_InvalidSVECpyImm64:
+  case Match_InvalidIndexRange0_0:
   case Match_InvalidIndexRange1_1:
   case Match_InvalidIndexRange0_15:
   case Match_InvalidIndexRange0_7:

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 1f413631d3998..96cbbed871fec 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -323,6 +323,17 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
       //                      ^ insert implicit 8-bit element tile
       MI.insert(MI.begin()+2, MCOperand::createReg(AArch64::ZAB0));
       break;
+    case AArch64::SMOVvi8to32_idx0:
+    case AArch64::SMOVvi8to64_idx0:
+    case AArch64::SMOVvi16to32_idx0:
+    case AArch64::SMOVvi16to64_idx0:
+    case AArch64::SMOVvi32to64_idx0:
+    case AArch64::UMOVvi8_idx0:
+    case AArch64::UMOVvi16_idx0:
+    case AArch64::UMOVvi32_idx0:
+    case AArch64::UMOVvi64_idx0:
+      MI.addOperand(MCOperand::createImm(0));
+      break;
     }
 
     if (Result != MCDisassembler::Fail)

diff  --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
index 25762b43c7ce2..03b35123e5867 100644
--- a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
@@ -15,3 +15,158 @@ bfcvtn v5.4h, v5.4s
 // CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
 // CHECK-NEXT: bfcvtn v5.4h, v5.4s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Check non-zero index is illegal in streaming mode
+// ------------------------------------------------------------------------- //
+// SMOV 8-bit to 32-bit
+
+smov w0, v0.b[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov w0, v0.b[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov w0, v0.b[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov w0, v0.b[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov w0, v0.b[15]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov w0, v0.b[15]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// SMOV 8-bit to 64-bit
+
+smov x0, v0.b[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.b[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov x0, v0.b[6]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.b[6]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov x0, v0.b[12]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.b[12]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// SMOV 16-bit to 32-bit
+
+smov w0, v0.h[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov w0, v0.h[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov w0, v0.h[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov w0, v0.h[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov w0, v0.h[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov w0, v0.h[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// SMOV 16-bit to 64-bit
+
+smov x0, v0.h[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.h[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov x0, v0.h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov x0, v0.h[6]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.h[6]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// SMOV 32-bit to 64-bit
+
+smov x0, v0.s[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.s[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov x0, v0.s[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.s[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+smov x0, v0.s[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: smov x0, v0.s[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// UMOV 8-bit to 32-bit
+
+umov w0, v0.b[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.b[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+umov w0, v0.b[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.b[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+umov w0, v0.b[15]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.b[15]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// UMOV 16-bit to 32-bit
+
+umov w0, v0.h[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.h[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+umov w0, v0.h[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.h[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+umov w0, v0.h[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.h[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// UMOV 32-bit to 32-bit
+
+umov w0, v0.s[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.s[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+umov w0, v0.s[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.s[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+umov w0, v0.s[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov w0, v0.s[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// UMOV 64-bit to 64-bit
+
+umov x0, v0.d[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK-NEXT: umov x0, v0.d[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

diff  --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
index 7ad14f301006f..157fcf966e569 100644
--- a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
@@ -71,3 +71,62 @@ frsqrte d0, d1
 // CHECK-INST: frsqrte d0, d1
 // CHECK-ENCODING: [0x20,0xd8,0xe1,0x7e]
 // CHECK-ERROR: instruction requires: streaming-sve or neon
+
+// Vector to GPR integer move instructions
+
+smov w0, v0.b[0]
+// CHECK-INST: smov w0, v0.b[0]
+// CHECK-ENCODING: [0x00,0x2c,0x01,0x0e]
+// CHECK-ERROR: instruction requires: neon
+
+smov x0, v0.b[0]
+// CHECK-INST: smov x0, v0.b[0]
+// CHECK-ENCODING: [0x00,0x2c,0x01,0x4e]
+// CHECK-ERROR: instruction requires: neon
+
+smov w0, v0.h[0]
+// CHECK-INST: smov w0, v0.h[0]
+// CHECK-ENCODING: [0x00,0x2c,0x02,0x0e]
+// CHECK-ERROR: instruction requires: neon
+
+smov x0, v0.h[0]
+// CHECK-INST: smov x0, v0.h[0]
+// CHECK-ENCODING: [0x00,0x2c,0x02,0x4e]
+// CHECK-ERROR: instruction requires: neon
+
+smov x0, v0.s[0]
+// CHECK-INST: smov x0, v0.s[0]
+// CHECK-ENCODING: [0x00,0x2c,0x04,0x4e]
+// CHECK-ERROR: instruction requires: neon
+
+umov w0, v0.b[0]
+// CHECK-INST: umov w0, v0.b[0]
+// CHECK-ENCODING: [0x00,0x3c,0x01,0x0e]
+// CHECK-ERROR: instruction requires: neon
+
+umov w0, v0.h[0]
+// CHECK-INST: umov w0, v0.h[0]
+// CHECK-ENCODING: [0x00,0x3c,0x02,0x0e]
+// CHECK-ERROR: instruction requires: neon
+
+umov w0, v0.s[0]
+// CHECK-INST: mov w0, v0.s[0]
+// CHECK-ENCODING: [0x00,0x3c,0x04,0x0e]
+// CHECK-ERROR: instruction requires: neon
+
+umov x0, v0.d[0]
+// CHECK-INST: mov x0, v0.d[0]
+// CHECK-ENCODING: [0x00,0x3c,0x08,0x4e]
+// CHECK-ERROR: instruction requires: neon
+
+// Aliases
+
+mov w0, v0.s[0]
+// CHECK-INST: mov w0, v0.s[0]
+// CHECK-ENCODING: [0x00,0x3c,0x04,0x0e]
+// CHECK-ERROR: instruction requires: neon
+
+mov x0, v0.d[0]
+// CHECK-INST: mov x0, v0.d[0]
+// CHECK-ENCODING: [0x00,0x3c,0x08,0x4e]
+// CHECK-ERROR: instruction requires: neon