[llvm] r210226 - R600/SI: Match rsq instructions

Matt Arsenault Matthew.Arsenault at amd.com
Wed Jun 4 17:15:55 PDT 2014


Author: arsenm
Date: Wed Jun  4 19:15:55 2014
New Revision: 210226

URL: http://llvm.org/viewvc/llvm-project?rev=210226&view=rev
Log:
R600/SI: Match rsq instructions

Added:
    llvm/trunk/test/CodeGen/R600/rsq.ll
Modified:
    llvm/trunk/lib/Target/R600/SIInstructions.td

Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=210226&r1=210225&r2=210226&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Wed Jun  4 19:15:55 2014
@@ -1017,12 +1017,16 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 <
   0x0000002d, "V_RSQ_LEGACY_F32",
   [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
 >;
-defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
+  [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))]
+>;
 defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
   [(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
 >;
 defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
-defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
+  [(set f64:$dst, (fdiv FP_ONE, (fsqrt f64:$src0)))]
+>;
 defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
 defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
   [(set f32:$dst, (fsqrt f32:$src0))]

Added: llvm/trunk/test/CodeGen/R600/rsq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/rsq.ll?rev=210226&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/rsq.ll (added)
+++ llvm/trunk/test/CodeGen/R600/rsq.ll Wed Jun  4 19:15:55 2014
@@ -0,0 +1,26 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; SI-LABEL: @rsq_f32
+; SI: V_RSQ_F32_e32
+; SI: S_ENDPGM
+define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+  %val = load float addrspace(1)* %in, align 4
+  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
+  %div = fdiv float 1.0, %sqrt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @rsq_f64
+; SI: V_RSQ_F64_e32
+; SI: S_ENDPGM
+define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+  %val = load double addrspace(1)* %in, align 4
+  %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
+  %div = fdiv double 1.0, %sqrt
+  store double %div, double addrspace(1)* %out, align 4
+  ret void
+}





More information about the llvm-commits mailing list