[llvm] r192242 - [AArch64] Add support for NEON scalar floating-point reciprocal estimate,

Tue Oct 8 15:09:05 PDT 2013

Author: mcrosier
Date: Tue Oct  8 17:09:04 2013
New Revision: 192242

URL: http://llvm.org/viewvc/llvm-project?rev=192242&view=rev
Log:
[AArch64] Add support for NEON scalar floating-point reciprocal estimate,
reciprocal exponent, and reciprocal square root estimate instructions.

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
    llvm/trunk/lib/Target/AArch64/AArch64InstrNEON.td
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll
    llvm/trunk/test/MC/AArch64/neon-diagnostics.s
    llvm/trunk/test/MC/AArch64/neon-scalar-recip.s
    llvm/trunk/test/MC/Disassembler/AArch64/neon-instructions.txt

Modified: llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td?rev=192242&r1=192241&r2=192242&view=diff
==============================================================================

--- llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td Tue Oct  8 17:09:04 2013
@@ -164,4 +164,7 @@ def int_aarch64_neon_vcvtf32_u32 :
   Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty], [IntrNoMem]>;
 def int_aarch64_neon_vcvtf64_u64 :
   Intrinsic<[llvm_v1f64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
+
+// Scalar Floating-point Reciprocal Exponent
+def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
 }

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrNEON.td?rev=192242&r1=192241&r2=192242&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrNEON.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrNEON.td Tue Oct  8 17:09:04 2013
@@ -3106,16 +3106,25 @@ multiclass NeonI_Scalar2SameMisc_SD_size
                           [], NoItinerary>;
 }
 
-multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator Sopnode,
-                                                 SDPatternOperator Dopnode,
-                                                 Instruction INSTS,
-                                                 Instruction INSTD> {
+multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
+                                                     SDPatternOperator Dopnode,
+                                                     Instruction INSTS,
+                                                     Instruction INSTD> {
   def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))),
             (INSTS FPR32:$Rn)>;
   def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))),
             (INSTD FPR64:$Rn)>;
 }
 
+multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
+                                                 Instruction INSTS,
+                                                 Instruction INSTD> {
+  def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
+            (INSTS FPR32:$Rn)>;
+  def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
+            (INSTD FPR64:$Rn)>;
+}
+
 // Scalar Integer Add
 let isCommutable = 1 in {
 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
@@ -3258,15 +3267,30 @@ defm : Neon_Scalar3Same_D_size_patterns<
 
 // Scalar Signed Integer Convert To Floating-point
 defm SCVTF  : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
-defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
-                                             int_aarch64_neon_vcvtf64_s64,
-                                             SCVTFss, SCVTFdd>;
+defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
+                                                 int_aarch64_neon_vcvtf64_s64,
+                                                 SCVTFss, SCVTFdd>;
 
 // Scalar Unsigned Integer Convert To Floating-point
 defm UCVTF  : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
-defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
-                                             int_aarch64_neon_vcvtf64_u64,
-                                             UCVTFss, UCVTFdd>;
+defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
+                                                 int_aarch64_neon_vcvtf64_u64,
+                                                 UCVTFss, UCVTFdd>;
+
+// Scalar Floating-point Reciprocal Estimate
+defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
+                                             FRECPEss, FRECPEdd>;
+
+// Scalar Floating-point Reciprocal Exponent
+defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
+                                             FRECPXss, FRECPXdd>;
+
+// Scalar Floating-point Reciprocal Square Root Estimate
+defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
+                                             FRSQRTEss, FRSQRTEdd>;
 
 // Scalar Reduce Pairwise
 

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll?rev=192242&r1=192241&r2=192242&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll Tue Oct  8 17:09:04 2013
@@ -45,3 +45,72 @@ define double @test_vrsqrtsd_f64(double
 
 declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>)
 declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
+
+define float @test_vrecpes_f32(float %a) {
+; CHECK: test_vrecpes_f32
+; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrecpe.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrecpe1.i = tail call <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float> %vrecpe.i)
+  %0 = extractelement <1 x float> %vrecpe1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrecped_f64(double %a) {
+; CHECK: test_vrecped_f64
+; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrecpe.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrecpe1.i = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %vrecpe.i)
+  %0 = extractelement <1 x double> %vrecpe1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float>)
+declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
+
+define float @test_vrecpxs_f32(float %a) {
+; CHECK: test_vrecpxs_f32
+; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrecpx.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrecpx1.i = tail call <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float> %vrecpx.i)
+  %0 = extractelement <1 x float> %vrecpx1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrecpxd_f64(double %a) {
+; CHECK: test_vrecpxd_f64
+; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrecpx.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrecpx1.i = tail call <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double> %vrecpx.i)
+  %0 = extractelement <1 x double> %vrecpx1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float>)
+declare <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double>)
+
+define float @test_vrsqrtes_f32(float %a) {
+; CHECK: test_vrsqrtes_f32
+; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrsqrte.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrsqrte1.i = tail call <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float> %vrsqrte.i)
+  %0 = extractelement <1 x float> %vrsqrte1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrsqrted_f64(double %a) {
+; CHECK: test_vrsqrted_f64
+; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrsqrte.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrsqrte1.i = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %vrsqrte.i)
+  %0 = extractelement <1 x double> %vrsqrte1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float>)
+declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)

Modified: llvm/trunk/test/MC/AArch64/neon-diagnostics.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AArch64/neon-diagnostics.s?rev=192242&r1=192241&r2=192242&view=diff
==============================================================================
--- llvm/trunk/test/MC/AArch64/neon-diagnostics.s (original)
+++ llvm/trunk/test/MC/AArch64/neon-diagnostics.s Tue Oct  8 17:09:04 2013
@@ -213,6 +213,47 @@
 // CHECK-ERROR:         movi v1.16b, #256
 // CHECK-ERROR:                      ^
 
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Estimate
+//----------------------------------------------------------------------
+
+    frecpe s19, h14
+    frecpe d13, s13
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpe s19, h14
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpe d13, s13
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Exponent
+//----------------------------------------------------------------------
+
+    frecpx s18, h10
+    frecpx d16, s19
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpx s18, h10
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpx d16, s19
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Square Root Estimate
+//----------------------------------------------------------------------
+
+    frsqrte s22, h13
+    frsqrte d21, s12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrte s22, h13
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrte d21, s12
+// CHECK-ERROR:                     ^
 
 //----------------------------------------------------------------------
 // Vector Move Immediate - bytemask, per doubleword

Modified: llvm/trunk/test/MC/AArch64/neon-scalar-recip.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AArch64/neon-scalar-recip.s?rev=192242&r1=192241&r2=192242&view=diff
==============================================================================
--- llvm/trunk/test/MC/AArch64/neon-scalar-recip.s (original)
+++ llvm/trunk/test/MC/AArch64/neon-scalar-recip.s Tue Oct  8 17:09:04 2013
@@ -21,3 +21,33 @@
 
 // CHECK: frsqrts s21, s5, s12   // encoding: [0xb5,0xfc,0xac,0x5e]
 // CHECK: frsqrts d8, d22, d18   // encoding: [0xc8,0xfe,0xf2,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Estimate
+//----------------------------------------------------------------------
+
+    frecpe s19, s14
+    frecpe d13, d13
+
+// CHECK: frecpe s19, s14    // encoding: [0xd3,0xd9,0xa1,0x5e]
+// CHECK: frecpe d13, d13    // encoding: [0xad,0xd9,0xe1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Exponent
+//----------------------------------------------------------------------
+
+    frecpx s18, s10
+    frecpx d16, d19
+
+// CHECK: frecpx s18, s10    // encoding: [0x52,0xf9,0xa1,0x5e]
+// CHECK: frecpx d16, d19    // encoding: [0x70,0xfa,0xe1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Square Root Estimate
+//----------------------------------------------------------------------
+
+    frsqrte s22, s13
+    frsqrte d21, d12
+
+// CHECK: frsqrte s22, s13    // encoding: [0xb6,0xd9,0xa1,0x7e]
+// CHECK: frsqrte d21, d12    // encoding: [0x95,0xd9,0xe1,0x7e]

Modified: llvm/trunk/test/MC/Disassembler/AArch64/neon-instructions.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AArch64/neon-instructions.txt?rev=192242&r1=192241&r2=192242&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AArch64/neon-instructions.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AArch64/neon-instructions.txt Tue Oct  8 17:09:04 2013
@@ -1508,3 +1508,27 @@
 # CHECK: ucvtf d21, d14
 0xb6,0xd9,0x21,0x7e
 0xd5,0xd9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Estimate
+#----------------------------------------------------------------------
+# CHECK: frecpe s19, s14
+# CHECK: frecpe d13, d13
+0xd3,0xd9,0xa1,0x5e
+0xad,0xd9,0xe1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Exponent
+#----------------------------------------------------------------------
+# CHECK: frecpx s18, s10
+# CHECK: frecpx d16, d19
+0x52,0xf9,0xa1,0x5e
+0x70,0xfa,0xe1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Square Root Estimate
+#----------------------------------------------------------------------
+# CHECK: frsqrte s22, s13
+# CHECK: frsqrte d21, d12
+0xb6,0xd9,0xa1,0x7e
+0x95,0xd9,0xe1,0x7e