[llvm] r268539 - [AArch64] Use the reciprocal estimation machinery

Eric Christopher via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 19 17:49:54 PDT 2016


Hi Evandro,

I'm going to ask you to revert this patch please. There are a few reasons:

a) I don't see a review thread here? This changes the port quite a bit.


>  ///
>  AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple
> &TT,
> @@ -149,7 +173,8 @@ AArch64TargetMachine::AArch64TargetMachi
>      : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU,
> FS,
>                          Options, RM, CM, OL),
>        TLOF(createTLOF(getTargetTriple())),
> -      isLittle(LittleEndian) {
> +      Subtarget(TT, CPU, FS, *this, LittleEndian) {
> +  initReciprocals(*this, Subtarget);
>    initAsmInfo();
>  }
>
> @@ -189,7 +214,7 @@ AArch64TargetMachine::getSubtargetImpl(c
>      // function that reside in TargetOptions.
>      resetTargetOptions(F);
>      I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
> -                                            isLittle);
> +                                            Subtarget.isLittleEndian());
>  #ifndef LLVM_BUILD_GLOBAL_ISEL
>     GISelAccessor *GISel = new GISelAccessor();
>  #else
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.h?rev=268539&r1=268538&r2=268539&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.h Wed May  4
> 15:18:27 2016
> @@ -46,7 +46,7 @@ public:
>    }
>
>  private:
> -  bool isLittle;
> +  AArch64Subtarget Subtarget;
>  };
>
>
These changes aren't correct. There is no need and it's currently invalid
for the AArch64 port to depend on a single global subtarget for all
configuration. This is going to break anything that depends on changing
subtargets on a function by function basis (including different -mcpu flags
in different translation units or different options).

You're going to need to do all of these changes in a different way and I
haven't had time to look to see what's going on.

Thanks!

-eric




>  // AArch64leTargetMachine - AArch64 little endian target machine.
>
> Added: llvm/trunk/test/CodeGen/AArch64/recp-fastmath.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/recp-fastmath.ll?rev=268539&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/recp-fastmath.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/recp-fastmath.ll Wed May  4 15:18:27
> 2016
> @@ -0,0 +1,79 @@
> +; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!div,!vec-div |
> FileCheck %s --check-prefix=FAULT
> +; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=div,vec-div   |
> FileCheck %s
> +
> +define float @frecp(float %x) #0 {
> +  %div = fdiv fast float 1.0, %x
> +  ret float %div
> +
> +; FAULT-LABEL: frecp:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fmov
> +; FAULT-NEXT: fdiv
> +
> +; CHECK-LABEL: frecp:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: frecpe
> +; CHECK-NEXT: fmov
> +}
> +
> +define <2 x float> @f2recp(<2 x float> %x) #0 {
> +  %div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
> +  ret <2 x float> %div
> +
> +; FAULT-LABEL: f2recp:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fmov
> +; FAULT-NEXT: fdiv
> +
> +; CHECK-LABEL: f2recp:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frecpe
> +}
> +
> +define <4 x float> @f4recp(<4 x float> %x) #0 {
> +  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float
> 1.0>, %x
> +  ret <4 x float> %div
> +
> +; FAULT-LABEL: f4recp:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fmov
> +; FAULT-NEXT: fdiv
> +
> +; CHECK-LABEL: f4recp:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frecpe
> +}
> +
> +define double @drecp(double %x) #0 {
> +  %div = fdiv fast double 1.0, %x
> +  ret double %div
> +
> +; FAULT-LABEL: drecp:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fmov
> +; FAULT-NEXT: fdiv
> +
> +; CHECK-LABEL: drecp:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: frecpe
> +; CHECK-NEXT: fmov
> +}
> +
> +define <2 x double> @d2recp(<2 x double> %x) #0 {
> +  %div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x
> +  ret <2 x double> %div
> +
> +; FAULT-LABEL: d2recp:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fmov
> +; FAULT-NEXT: fdiv
> +
> +; CHECK-LABEL: d2recp:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frecpe
> +}
> +
> +attributes #0 = { nounwind "unsafe-fp-math"="true" }
>
> Added: llvm/trunk/test/CodeGen/AArch64/sqrt-fastmath.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/sqrt-fastmath.ll?rev=268539&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/sqrt-fastmath.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/sqrt-fastmath.ll Wed May  4 15:18:27
> 2016
> @@ -0,0 +1,158 @@
> +; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!sqrt,!vec-sqrt |
> FileCheck %s --check-prefix=FAULT
> +; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=sqrt,vec-sqrt   |
> FileCheck %s
> +
> +declare float @llvm.sqrt.f32(float) #1
> +declare double @llvm.sqrt.f64(double) #1
> +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1
> +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
> +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #1
> +
> +define float @fsqrt(float %a) #0 {
> +  %1 = tail call fast float @llvm.sqrt.f32(float %a)
> +  ret float %1
> +
> +; FAULT-LABEL: fsqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: fsqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define <2 x float> @f2sqrt(<2 x float> %a) #0 {
> +  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
> +  ret <2 x float> %1
> +
> +; FAULT-LABEL: f2sqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: f2sqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: mov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define <4 x float> @f4sqrt(<4 x float> %a) #0 {
> +  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
> +  ret <4 x float> %1
> +
> +; FAULT-LABEL: f4sqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: f4sqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: mov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define double @dsqrt(double %a) #0 {
> +  %1 = tail call fast double @llvm.sqrt.f64(double %a)
> +  ret double %1
> +
> +; FAULT-LABEL: dsqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: dsqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define <2 x double> @d2sqrt(<2 x double> %a) #0 {
> +  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
> +  ret <2 x double> %1
> +
> +; FAULT-LABEL: d2sqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: d2sqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: mov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define float @frsqrt(float %a) #0 {
> +  %1 = tail call fast float @llvm.sqrt.f32(float %a)
> +  %2 = fdiv fast float 1.000000e+00, %1
> +  ret float %2
> +
> +; FAULT-LABEL: frsqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: frsqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
> +  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
> +  %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
> +  ret <2 x float> %2
> +
> +; FAULT-LABEL: f2rsqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: f2rsqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
> +  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
> +  %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00,
> float 1.000000e+00, float 1.000000e+00>, %1
> +  ret <4 x float> %2
> +
> +; FAULT-LABEL: f4rsqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: f4rsqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define double @drsqrt(double %a) #0 {
> +  %1 = tail call fast double @llvm.sqrt.f64(double %a)
> +  %2 = fdiv fast double 1.000000e+00, %1
> +  ret double %2
> +
> +; FAULT-LABEL: drsqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: drsqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
> +  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
> +  %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>,
> %1
> +  ret <2 x double> %2
> +
> +; FAULT-LABEL: d2rsqrt:
> +; FAULT-NEXT: BB#0
> +; FAULT-NEXT: fsqrt
> +
> +; CHECK-LABEL: d2rsqrt:
> +; CHECK-NEXT: BB#0
> +; CHECK-NEXT: fmov
> +; CHECK-NEXT: frsqrte
> +}
> +
> +attributes #0 = { nounwind "unsafe-fp-math"="true" }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160920/76246db9/attachment.html>


More information about the llvm-commits mailing list