commit 3908880228cc5ecd262da8f35d257bd305f52996 Author: Ana Pazos Date: Mon Nov 11 16:14:18 2013 -0800 Implemented AdvSIMD scalar x indexed element format and AdvSIMD scalar copy in MC layer. Added the MC layer tests. Fixed tripple setting in test cases diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index b0aadfb..2c8cc6b 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1359,5 +1359,33 @@ class NeonI_Crypto_3VSHA size, bits<3> opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD scalar x indexed element +class NeonI_ScalarXIndexedElem opcode, dag outs, dag ins, + string asmstr, list patterns, + InstrItinClass itin> + : A64InstRdnm +{ + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11111; + let Inst{23} = szhi; + let Inst{22} = szlo; + // l in Inst{21} + // m in Instr{20} + // Inherit Rm in 19-16 + let Inst{15-12} = opcode; + // h in Inst{11} + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} +// Format AdvSIMD scalar copy - insert from element to scalar +class NeonI_ScalarCopy patterns, InstrItinClass itin> + : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> { + let Inst{28} = 0b1; +} } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index ae217f9..23d81fc 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -1278,7 +1278,7 @@ def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)), // UBFX makes sense as an implementation of a 64-bit zero-extension too. Could // use either 64-bit or 32-bit variant, but 32-bit might be more efficient. -def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), +def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), sub_32)>; //===------------------------------- diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 95e54f2..83bb1fa 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4671,6 +4671,294 @@ defm : Neon_ScalarPair_SD_size_patterns; +def neon_uimm0_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm0_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm1_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm1_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm2_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm2_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm3_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm4_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + + +// Scalar by element Arithmetic + +class NeonI_ScalarXIndexedElemArith opcode, + string rmlane, bit u, bit szhi, bit szlo, + RegisterClass ResFPR, RegisterClass OpFPR, + RegisterOperand OpVPR, Operand OpImm> + : NeonI_ScalarXIndexedElem { + bits<3> Imm; + bits<5> MRm; +} + +class NeonI_ScalarXIndexedElemArith_Constraint_Impl opcode, + string rmlane, + bit u, bit szhi, bit szlo, + RegisterClass ResFPR, + RegisterClass OpFPR, + RegisterOperand OpVPR, + Operand OpImm> + : NeonI_ScalarXIndexedElem { + let Constraints = "$src = $Rd"; + bits<3> Imm; + bits<5> MRm; +} + +// Scalar Floating Point multiply (scalar, by element) +def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul", + 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul", + 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point multiply extended (scalar, by element) +def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx", + 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx", + 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point fused multiply-add (scalar, by element) +def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", + 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", + 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point fused multiply-subtract (scalar, by element) +def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", + 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", + 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling multiply-add long (scalar, by element) +def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling +// multiply-subtract long (scalar, by element) +def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling multiply long (scalar, by element) +def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling multiply returning +// high half (scalar, by element) +def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating rounding doubling multiply +// returning high half (scalar, by element) +def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + + +// Scalar Copy - DUP element to scalar +class NeonI_Scalar_DUP + : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm), + asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]", + [], + NoItinerary> { + bits<4> Imm; +} + +def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} //===----------------------------------------------------------------------===// @@ -4792,36 +5080,6 @@ def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; -def neon_uimm0_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm1_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm1_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm2_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm2_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm3_bare : Operand, - ImmLeaf { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm4_bare : Operand, - ImmLeaf { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - def neon_uimm3 : Operand, ImmLeaf { let ParserMatchClass = uimm3_asmoperand; diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 38845b6..c4f3062 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -82,6 +82,8 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); @@ -379,6 +381,14 @@ DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static DecodeStatus +DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + + return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder); +} static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s index 337b94c..1de6909 100644 --- a/test/MC/AArch64/neon-3vdiff.s +++ b/test/MC/AArch64/neon-3vdiff.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 5ada875..0a2332b 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -4667,7 +4667,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal s17, h27, s12 // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: too few operands for instruction // CHECK-ERROR: sqdmlal d19, s24, d12 // CHECK-ERROR: ^ @@ -4681,7 +4681,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl s14, h12, s25 // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: too few operands for instruction // CHECK-ERROR: sqdmlsl d12, s23, d13 // CHECK-ERROR: ^ @@ -4695,7 +4695,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull s12, h22, s12 // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: too few operands for instruction // CHECK-ERROR: sqdmull d15, s22, d12 // CHECK-ERROR: ^ @@ -5687,3 +5687,244 @@ // CHECK-ERROR :4341:17: error: invalid operand for instruction // CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d // CHECK-ERROR ^ + +//---------------------------------------------------------------------- +// Floating Point multiply (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fmul s0, s1, v1.h[0] + fmul h0, h1, v1.s[0] + // invalid lane + fmul s2, s29, v10.s[4] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmul s0, s1, v1.h[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmul h0, h1, v1.s[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: fmul s2, s29, v10.s[4] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Floating Point multiply extended (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fmulx d0, d1, v1.b[0] + fmulx h0, h1, v1.d[0] + // invalid lane + fmulx d2, d29, v10.d[3] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmulx d0, d1, v1.b[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmulx h0, h1, v1.d[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: fmulx d2, d29, v10.d[3] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Floating Point fused multiply-add (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fmla b0, b1, v1.b[0] + fmla d30, s11, v1.d[1] + // invalid lane + fmla s16, s22, v16.s[5] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmla b0, b1, v1.b[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmla d30, s11, v1.d[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: fmla s16, s22, v16.s[5] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Floating Point fused multiply-subtract (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fmls s29, h10, v28.s[1] + fmls h7, h17, v26.s[2] + // invalid lane + fmls d16, d22, v16.d[-1] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmls s29, h10, v28.s[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmls h7, h17, v26.s[2] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected lane number +// CHECK-ERROR: fmls d16, d22, v16.d[-1] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed saturating doubling multiply-add long +// (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + sqdmlal s0, h0, v0.s[0] + sqdmlal s8, s9, v14.s[1] + // invalid lane + sqdmlal s4, s5, v1.s[5] + // invalid vector index + sqdmlal s0, h0, v17.h[0] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmlal s0, h0, v0.s[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmlal s8, s9, v14.s[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: sqdmlal s4, s5, v1.s[5] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmlal s0, h0, v17.h[0] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed saturating doubling multiply-subtract long +// (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + sqdmlsl s1, h1, v1.d[0] + sqdmlsl d1, h1, v13.s[0] + // invalid lane + sqdmlsl d1, s1, v13.s[4] + // invalid vector index + sqdmlsl s1, h1, v20.h[7] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmlsl s1, h1, v1.d[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmlsl d1, h1, v13.s[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: sqdmlsl d1, s1, v13.s[4] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmlsl s1, h1, v20.h[7] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed saturating doubling multiply long (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + // invalid lane + // invalid vector index + // mismatched and invalid vector types + sqdmull s1, h1, v1.s[1] + sqdmull s1, s1, v4.s[0] + // invalid lane + sqdmull s12, h17, v9.h[9] + // invalid vector index + sqdmull s1, h1, v16.h[5] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmull s1, h1, v1.s[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmull s1, s1, v4.s[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: sqdmull s12, h17, v9.h[9] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmull s1, h1, v16.h[5] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed saturating doubling multiply returning +// high half (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + sqdmulh h0, s1, v0.h[0] + sqdmulh s25, s26, v27.h[3] + // invalid lane + sqdmulh s25, s26, v27.s[4] + // invalid vector index + sqdmulh s0, h1, v30.h[0] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmulh h0, s1, v0.h[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmulh s25, s26, v27.h[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: sqdmulh s25, s26, v27.s[4] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmulh s0, h1, v30.h[0] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed saturating rounding doubling multiply +// returning high half (scalar, by element) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + sqrdmulh h31, h30, v14.s[2] + sqrdmulh s5, h6, v7.s[2] + // invalid lane + sqrdmulh h31, h30, v14.h[9] + // invalid vector index + sqrdmulh h31, h30, v20.h[4] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmulh h31, h30, v14.s[2] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmulh s5, h6, v7.s[2] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: sqrdmulh h31, h30, v14.h[9] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmulh h31, h30, v20.h[4] +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Duplicate element (scalar) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + dup b0, v1.d[0] + dup h0, v31.b[8] + dup s0, v2.h[4] + dup d0, v17.s[3] + // invalid lane + dup d0, v17.d[4] + dup s0, v1.s[7] + dup h0, v31.h[16] + dup b1, v3.b[16] +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: dup b0, v1.d[0] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: dup h0, v31.b[8] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: dup s0, v2.h[4] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: dup d0, v17.s[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: dup d0, v17.d[4] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: dup s0, v1.s[7] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: dup h0, v31.h[16] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: lane number incompatible with layout +// CHECK-ERROR: dup b1, v3.b[16] +// CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s new file mode 100755 index 0000000..fec9d12 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s @@ -0,0 +1,44 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Floating Point fused multiply-add (scalar, by element) +//------------------------------------------------------------------------------ + fmla s0, s1, v1.s[0] + fmla s30, s11, v1.s[1] + fmla s4, s5, v7.s[2] + fmla s16, s22, v16.s[3] + fmla d0, d1, v1.d[0] + fmla d30, d11, v1.d[1] + +// CHECK: fmla s0, s1, v1.s[0] // encoding: [0x20,0x10,0x81,0x5f] +// CHECK: fmla s30, s11, v1.s[1] // encoding: [0x7e,0x11,0xa1,0x5f] +// CHECK: fmla s4, s5, v7.s[2] // encoding: [0xa4,0x18,0x87,0x5f] +// CHECK: fmla s16, s22, v16.s[3] // encoding: [0xd0,0x1a,0xb0,0x5f] +// CHECK: fmla d0, d1, v1.d[0] // encoding: [0x20,0x10,0xc1,0x5f] +// CHECK: fmla d30, d11, v1.d[1] // encoding: [0x7e,0x19,0xc1,0x5f] + +//------------------------------------------------------------------------------ +// Floating Point fused multiply-subtract (scalar, by element) +//------------------------------------------------------------------------------ + + fmls s2, s3, v4.s[0] + fmls s29, s10, v28.s[1] + fmls s5, s12, v23.s[2] + fmls s7, s17, v26.s[3] + fmls d0, d1, v1.d[0] + fmls d30, d11, v1.d[1] + +// CHECK: fmls s2, s3, v4.s[0] // encoding: [0x62,0x50,0x84,0x5f] +// CHECK: fmls s29, s10, v28.s[1] // encoding: [0x5d,0x51,0xbc,0x5f] +// CHECK: fmls s5, s12, v23.s[2] // encoding: [0x85,0x59,0x97,0x5f] +// CHECK: fmls s7, s17, v26.s[3] // encoding: [0x27,0x5a,0xba,0x5f] +// CHECK: fmls d0, d1, v1.d[0] // encoding: [0x20,0x50,0xc1,0x5f] +// CHECK: fmls d30, d11, v1.d[1] // encoding: [0x7e,0x59,0xc1,0x5f] + + + + + + + + diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s new file mode 100755 index 0000000..8b8a3f5 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s @@ -0,0 +1,37 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Floating Point multiply (scalar, by element) +//------------------------------------------------------------------------------ + fmul s0, s1, v1.s[0] + fmul s30, s11, v1.s[1] + fmul s4, s5, v7.s[2] + fmul s16, s22, v16.s[3] + fmul d0, d1, v1.d[0] + fmul d30, d11, v1.d[1] + +// CHECK: fmul s0, s1, v1.s[0] // encoding: [0x20,0x90,0x81,0x5f] +// CHECK: fmul s30, s11, v1.s[1] // encoding: [0x7e,0x91,0xa1,0x5f] +// CHECK: fmul s4, s5, v7.s[2] // encoding: [0xa4,0x98,0x87,0x5f] +// CHECK: fmul s16, s22, v16.s[3] // encoding: [0xd0,0x9a,0xb0,0x5f] +// CHECK: fmul d0, d1, v1.d[0] // encoding: [0x20,0x90,0xc1,0x5f] +// CHECK: fmul d30, d11, v1.d[1] // encoding: [0x7e,0x99,0xc1,0x5f] + + +//------------------------------------------------------------------------------ +// Floating Point multiply extended (scalar, by element) +//------------------------------------------------------------------------------ + fmulx s6, s2, v8.s[0] + fmulx s7, s3, v13.s[1] + fmulx s9, s7, v9.s[2] + fmulx s13, s21, v10.s[3] + fmulx d15, d9, v7.d[0] + fmulx d13, d12, v11.d[1] + +// CHECK: fmulx s6, s2, v8.s[0] // encoding: [0x46,0x90,0x88,0x7f] +// CHECK: fmulx s7, s3, v13.s[1] // encoding: [0x67,0x90,0xad,0x7f] +// CHECK: fmulx s9, s7, v9.s[2] // encoding: [0xe9,0x98,0x89,0x7f] +// CHECK: fmulx s13, s21, v10.s[3] // encoding: [0xad,0x9a,0xaa,0x7f] +// CHECK: fmulx d15, d9, v7.d[0] // encoding: [0x2f,0x91,0xc7,0x7f] +// CHECK: fmulx d13, d12, v11.d[1] // encoding: [0x8d,0x99,0xcb,0x7f] + diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s new file mode 100755 index 0000000..e3d7e05 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//----------------------------------------------------------------------------- +// Signed saturating doubling multiply-add long (scalar, by element) +//----------------------------------------------------------------------------- + sqdmlal s0, h0, v0.h[0] + sqdmlal s7, h1, v4.h[3] + sqdmlal s11, h16, v8.h[4] + sqdmlal s30, h30, v15.h[7] + sqdmlal d0, s0, v3.s[0] + sqdmlal d30, s30, v30.s[3] + sqdmlal d8, s9, v14.s[1] + +// CHECK: sqdmlal s0, h0, v0.h[0] // encoding: [0x00,0x30,0x40,0x5f] +// CHECK: sqdmlal s7, h1, v4.h[3] // encoding: [0x27,0x30,0x74,0x5f] +// CHECK: sqdmlal s11, h16, v8.h[4] // encoding: [0x0b,0x3a,0x48,0x5f] +// CHECK: sqdmlal s30, h30, v15.h[7] // encoding: [0xde,0x3b,0x7f,0x5f] +// CHECK: sqdmlal d0, s0, v3.s[0] // encoding: [0x00,0x30,0x83,0x5f] +// CHECK: sqdmlal d30, s30, v30.s[3] // encoding: [0xde,0x3b,0xbe,0x5f] +// CHECK: sqdmlal d8, s9, v14.s[1] // encoding: [0x28,0x31,0xae,0x5f] + +//----------------------------------------------------------------------------- +// Signed saturating doubling multiply-subtract long (scalar, by element) +//----------------------------------------------------------------------------- + sqdmlsl s1, h1, v1.h[0] + sqdmlsl s8, h2, v5.h[1] + sqdmlsl s12, h13, v14.h[2] + sqdmlsl s29, h28, v11.h[7] + sqdmlsl d1, s1, v13.s[0] + sqdmlsl d31, s31, v31.s[2] + sqdmlsl d16, s18, v28.s[3] + +// CHECK: sqdmlsl s1, h1, v1.h[0] // encoding: [0x21,0x70,0x41,0x5f] +// CHECK: sqdmlsl s8, h2, v5.h[1] // encoding: [0x48,0x70,0x55,0x5f] +// CHECK: sqdmlsl s12, h13, v14.h[2] // encoding: [0xac,0x71,0x6e,0x5f] +// CHECK: sqdmlsl s29, h28, v11.h[7] // encoding: [0x9d,0x7b,0x7b,0x5f] +// CHECK: sqdmlsl d1, s1, v13.s[0] // encoding: [0x21,0x70,0x8d,0x5f] +// CHECK: sqdmlsl d31, s31, v31.s[2] // encoding: [0xff,0x7b,0x9f,0x5f] +// CHECK: sqdmlsl d16, s18, v28.s[3] // encoding: [0x50,0x7a,0xbc,0x5f] + + + + + + + diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s new file mode 100755 index 0000000..8a8405e --- /dev/null +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s @@ -0,0 +1,58 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//----------------------------------------------------------------------------- +// Signed saturating doubling multiply long (scalar, by element) +//----------------------------------------------------------------------------- + sqdmull s1, h1, v1.h[1] + sqdmull s8, h2, v5.h[2] + sqdmull s12, h17, v9.h[3] + sqdmull s31, h31, v15.h[7] + sqdmull d1, s1, v4.s[0] + sqdmull d31, s31, v31.s[3] + sqdmull d9, s10, v15.s[0] + + +// CHECK: sqdmull s1, h1, v1.h[1] // encoding: [0x21,0xb0,0x51,0x5f] +// CHECK: sqdmull s8, h2, v5.h[2] // encoding: [0x48,0xb0,0x65,0x5f] +// CHECK: sqdmull s12, h17, v9.h[3] // encoding: [0x2c,0xb2,0x79,0x5f] +// CHECK: sqdmull s31, h31, v15.h[7] // encoding: [0xff,0xbb,0x7f,0x5f] +// CHECK: sqdmull d1, s1, v4.s[0] // encoding: [0x21,0xb0,0x84,0x5f] +// CHECK: sqdmull d31, s31, v31.s[3] // encoding: [0xff,0xbb,0xbf,0x5f] +// CHECK: sqdmull d9, s10, v15.s[0] // encoding: [0x49,0xb1,0x8f,0x5f] + +//----------------------------------------------------------------------------- +// Scalar Signed saturating doubling multiply returning +// high half (scalar, by element) +//----------------------------------------------------------------------------- + sqdmulh h0, h1, v0.h[0] + sqdmulh h10, h11, v10.h[4] + sqdmulh h20, h21, v15.h[7] + sqdmulh s25, s26, v27.s[3] + sqdmulh s2, s6, v7.s[0] + +// CHECK: sqdmulh h0, h1, v0.h[0] // encoding: [0x20,0xc0,0x40,0x5f] +// CHECK: sqdmulh h10, h11, v10.h[4] // encoding: [0x6a,0xc9,0x4a,0x5f] +// CHECK: sqdmulh h20, h21, v15.h[7] // encoding: [0xb4,0xca,0x7f,0x5f] +// CHECK: sqdmulh s25, s26, v27.s[3] // encoding: [0x59,0xcb,0xbb,0x5f] +// CHECK: sqdmulh s2, s6, v7.s[0] // encoding: [0xc2,0xc0,0x87,0x5f] + +//----------------------------------------------------------------------------- +// Signed saturating rounding doubling multiply returning +// high half (scalar, by element) +//----------------------------------------------------------------------------- + sqrdmulh h31, h30, v14.h[2] + sqrdmulh h1, h1, v1.h[4] + sqrdmulh h21, h22, v15.h[7] + sqrdmulh s5, s6, v7.s[2] + sqrdmulh s20, s26, v27.s[1] + +// CHECK: sqrdmulh h31, h30, v14.h[2] // encoding: [0xdf,0xd3,0x6e,0x5f] +// CHECK: sqrdmulh h1, h1, v1.h[4] // encoding: [0x21,0xd8,0x41,0x5f] +// CHECK: sqrdmulh h21, h22, v15.h[7] // encoding: [0xd5,0xda,0x7f,0x5f] +// CHECK: sqrdmulh s5, s6, v7.s[2] // encoding: [0xc5,0xd8,0x87,0x5f] +// CHECK: sqrdmulh s20, s26, v27.s[1] // encoding: [0x54,0xd3,0xbb,0x5f] + + + + + diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s new file mode 100644 index 0000000..64366f2 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-dup.s @@ -0,0 +1,29 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Duplicate element (scalar) +//------------------------------------------------------------------------------ + dup b0, v0.b[15] + dup b1, v0.b[7] + dup b17, v0.b[0] + dup h5, v31.h[7] + dup h9, v1.h[4] + dup h11, v17.h[0] + dup s2, v2.s[3] + dup s4, v21.s[0] + dup s31, v21.s[2] + dup d3, v5.d[0] + dup d6, v5.d[1] + +// CHECK: dup b0, v0.b[15] // encoding: [0x00,0x04,0x1f,0x5e] +// CHECK: dup b1, v0.b[7] // encoding: [0x01,0x04,0x0f,0x5e] +// CHECK: dup b17, v0.b[0] // encoding: [0x11,0x04,0x01,0x5e] +// CHECK: dup h5, v31.h[7] // encoding: [0xe5,0x07,0x1e,0x5e] +// CHECK: dup h9, v1.h[4] // encoding: [0x29,0x04,0x12,0x5e] +// CHECK: dup h11, v17.h[0] // encoding: [0x2b,0x06,0x02,0x5e] +// CHECK: dup s2, v2.s[3] // encoding: [0x42,0x04,0x1c,0x5e] +// CHECK: dup s4, v21.s[0] // encoding: [0xa4,0x06,0x04,0x5e] +// CHECK: dup s31, v21.s[2] // encoding: [0xbf,0x06,0x14,0x5e] +// CHECK: dup d3, v5.d[0] // encoding: [0xa3,0x04,0x08,0x5e] +// CHECK: dup d6, v5.d[1] // encoding: [0xa6,0x04,0x18,0x5e] + diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s index 7edcc1b..67fe309 100644 --- a/test/MC/AArch64/neon-simd-copy.s +++ b/test/MC/AArch64/neon-simd-copy.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s index 9e6e1aa..a164323 100644 --- a/test/MC/AArch64/neon-simd-shift.s +++ b/test/MC/AArch64/neon-simd-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index 2f53375..b9ea7c1 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -2174,3 +2174,216 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | 0x28,0x78,0x82,0x0e 0x19,0x78,0x82,0x4e 0x0a,0x78,0xc2,0x4e + +#---------------------------------------------------------------------- +# Scalar Floating Point multiply (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: fmul s0, s1, v1.s[0] +# CHECK: fmul s0, s1, v1.s[3] +# CHECK: fmul d0, d1, v1.d[0] +# CHECK: fmul d0, d1, v1.d[1] +# CHECK: fmul d15, d15, v15.d[1] +0x20 0x90 0x81 0x5f +0x20 0x98 0xa1 0x5f +0x20 0x90 0xc1 0x5f +0x20 0x98 0xc1 0x5f +0xef 0x99 0xcf 0x5f + +#---------------------------------------------------------------------- +# Scalar Floating Point multiply extended (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: fmulx s3, s5, v7.s[0] +# CHECK: fmulx s3, s5, v7.s[3] +# CHECK: fmulx s3, s5, v15.s[3] +# CHECK: fmulx d0, d4, v8.d[0] +# CHECK: fmulx d0, d4, v8.d[1] +0xa3 0x90 0x87 0x7f +0xa3 0x98 0xa7 0x7f +0xa3 0x98 0xaf 0x7f +0x80 0x90 0xc8 0x7f +0x80 0x98 0xc8 0x7f + +#---------------------------------------------------------------------- +# Scalar Floating Point fused multiply-add (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: fmla s0, s1, v1.s[0] +# CHECK: fmla s0, s1, v1.s[3] +# CHECK: fmla d0, d1, v1.d[0] +# CHECK: fmla d0, d1, v1.d[1] +# CHECK: fmla d15, d15, v15.d[1] +0x20 0x10 0x81 0x5f +0x20 0x18 0xa1 0x5f +0x20 0x10 0xc1 0x5f +0x20 0x18 0xc1 0x5f +0xef 0x19 0xcf 0x5f + +#---------------------------------------------------------------------- +# Scalar Floating Point fused multiply-sub (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: fmls s3, s5, v7.s[0] +# CHECK: fmls s3, s5, v7.s[3] +# CHECK: fmls s3, s5, v15.s[3] +# CHECK: fmls d0, d4, v8.d[0] +# CHECK: fmls d0, d4, v8.d[1] +0xa3 0x50 0x87 0x5f +0xa3 0x58 0xa7 0x5f +0xa3 0x58 0xaf 0x5f +0x80 0x50 0xc8 0x5f +0x80 0x58 0xc8 0x5f + +#---------------------------------------------------------------------- +# Scalar Signed saturating doubling +# multiply-add long (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: sqdmlal s0, h0, v0.h[0] +# CHECK: sqdmlal s0, h0, v0.h[1] +# CHECK: sqdmlal s0, h0, v0.h[2] +# CHECK: sqdmlal s0, h0, v0.h[3] +# CHECK: sqdmlal s0, h0, v0.h[4] +# CHECK: sqdmlal s0, h0, v0.h[5] +# CHECK: sqdmlal s0, h0, v0.h[6] +# CHECK: sqdmlal s0, h0, v0.h[7] +# CHECK: sqdmlal d8, s9, v15.s[0] +# CHECK: sqdmlal d8, s9, v15.s[1] +# CHECK: sqdmlal d8, s9, v15.s[2] +# CHECK: sqdmlal d8, s9, v15.s[3] +0x00 0x30 0x40 0x5f +0x00 0x30 0x50 0x5f +0x00 0x30 0x60 0x5f +0x00 0x30 0x70 0x5f +0x00 0x38 0x40 0x5f +0x00 0x38 0x50 0x5f +0x00 0x38 0x60 0x5f +0x00 0x38 0x70 0x5f +0x28 0x31 0x8f 0x5f +0x28 0x31 0xaf 0x5f +0x28 0x39 0x8f 0x5f +0x28 0x39 0xaf 0x5f + +#---------------------------------------------------------------------- +# Scalar Signed saturating doubling +# multiply-sub long (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: sqdmlsl s0, h0, v0.h[0] +# CHECK: sqdmlsl s0, h0, v0.h[1] +# CHECK: sqdmlsl s0, h0, v0.h[2] +# CHECK: sqdmlsl s0, h0, v0.h[3] +# CHECK: sqdmlsl s0, h0, v0.h[4] +# CHECK: sqdmlsl s0, h0, v0.h[5] +# CHECK: sqdmlsl s0, h0, v0.h[6] +# CHECK: sqdmlsl s0, h0, v0.h[7] +# CHECK: sqdmlsl d8, s9, v15.s[0] +# CHECK: sqdmlsl d8, s9, v15.s[1] +# CHECK: sqdmlsl d8, s9, v15.s[2] +# CHECK: sqdmlsl d8, s9, v15.s[3] +0x00 0x70 0x40 0x5f +0x00 0x70 0x50 0x5f +0x00 0x70 0x60 0x5f +0x00 0x70 0x70 0x5f +0x00 0x78 0x40 0x5f +0x00 0x78 0x50 0x5f +0x00 0x78 0x60 0x5f +0x00 0x78 0x70 0x5f +0x28 0x71 0x8f 0x5f +0x28 0x71 0xaf 0x5f +0x28 0x79 0x8f 0x5f +0x28 0x79 0xaf 0x5f + +#---------------------------------------------------------------------- +# Scalar Signed saturating doubling multiply long (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: sqdmull s1, h1, v1.h[0] +# CHECK: sqdmull s1, h1, v1.h[1] +# CHECK: sqdmull s1, h1, v1.h[2] +# CHECK: sqdmull s1, h1, v1.h[3] +# CHECK: sqdmull s1, h1, v1.h[4] +# CHECK: sqdmull s1, h1, v1.h[5] +# CHECK: sqdmull s1, h1, v1.h[6] +# CHECK: sqdmull s1, h1, v1.h[7] +# CHECK: sqdmull d1, s1, v4.s[0] +# CHECK: sqdmull d1, s1, v4.s[1] +# CHECK: sqdmull d1, s1, v4.s[2] +# CHECK: sqdmull d1, s1, v4.s[3] +0x21 0xb0 0x41 0x5f +0x21 0xb0 0x51 0x5f +0x21 0xb0 0x61 0x5f +0x21 0xb0 0x71 0x5f +0x21 0xb8 0x41 0x5f +0x21 0xb8 0x51 0x5f +0x21 0xb8 0x61 0x5f +0x21 0xb8 0x71 0x5f +0x21 0xb0 0x84 0x5f +0x21 0xb0 0xa4 0x5f +0x21 0xb8 0x84 0x5f +0x21 0xb8 0xa4 0x5f + +#---------------------------------------------------------------------- +# Scalar Signed saturating doubling multiply returning +# high half (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: sqdmulh h7, h1, v14.h[0] +# CHECK: sqdmulh h7, h15, v8.h[1] +# CHECK: sqdmulh h7, h15, v8.h[2] +# CHECK: sqdmulh h7, h15, v8.h[3] +# CHECK: sqdmulh h7, h15, v8.h[4] +# CHECK: sqdmulh h7, h15, v8.h[5] +# CHECK: sqdmulh h7, h15, v8.h[6] +# CHECK: sqdmulh h7, h15, v8.h[7] +# CHECK: sqdmulh s15, s3, v4.s[0] +# CHECK: sqdmulh s15, s14, v16.s[1] +# CHECK: sqdmulh s15, s15, v16.s[2] +# CHECK: sqdmulh s15, s16, v17.s[3] +0x27 0xc0 0x4e 0x5f +0xe7 0xc1 0x58 0x5f +0xe7 0xc1 0x68 0x5f +0xe7 0xc1 0x78 0x5f +0xe7 0xc9 0x48 0x5f +0xe7 0xc9 0x58 0x5f +0xe7 0xc9 0x68 0x5f +0xe7 0xc9 0x78 0x5f +0x6f 0xc0 0x84 0x5f +0xcf 0xc1 0xb0 0x5f +0xef 0xc9 0x90 0x5f +0x0f 0xca 0xb1 0x5f + +#---------------------------------------------------------------------- +# Scalar Signed saturating rounding doubling multiply +# returning high half (scalar, by element) +#---------------------------------------------------------------------- +# CHECK: sqrdmulh h7, h1, v14.h[0] +# CHECK: sqrdmulh h7, h15, v8.h[1] +# CHECK: sqrdmulh h7, h15, v8.h[2] +# CHECK: sqrdmulh h7, h15, v8.h[3] +# CHECK: sqrdmulh h7, h15, v8.h[4] +# CHECK: sqrdmulh h7, h15, v8.h[5] +# CHECK: sqrdmulh h7, h15, v8.h[6] +# CHECK: sqrdmulh h7, h15, v8.h[7] +# CHECK: sqrdmulh s15, s3, v4.s[0] +# CHECK: sqrdmulh s15, s14, v16.s[1] +# CHECK: sqrdmulh s15, s15, v16.s[2] +# CHECK: sqrdmulh s15, s16, v17.s[3] +0x27 0xd0 0x4e 0x5f +0xe7 0xd1 0x58 0x5f +0xe7 0xd1 0x68 0x5f +0xe7 0xd1 0x78 0x5f +0xe7 0xd9 0x48 0x5f +0xe7 0xd9 0x58 0x5f +0xe7 0xd9 0x68 0x5f +0xe7 0xd9 0x78 0x5f +0x6f 0xd0 0x84 0x5f +0xcf 0xd1 0xb0 0x5f +0xef 0xd9 0x90 0x5f +0x0f 0xda 0xb1 0x5f + +#---------------------------------------------------------------------- +#Duplicate element (scalar) +#---------------------------------------------------------------------- +# CHECK: dup b0, v0.b[15] +# CHECK: dup h2, v31.h[5] +# CHECK: dup s17, v2.s[2] +# CHECK: dup d6, v12.d[1] +0x00 0x04 0x1f 0x5e +0xe2 0x07 0x16 0x5e +0x51 0x04 0x14 0x5e +0x86 0x05 0x18 0x5e +