[llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun May 22 08:58:25 PDT 2016


Can you commit those test cases so this fix is checked?

On Sunday, May 22, 2016, Zuckerman, Michael <michael.zuckerman at intel.com>
wrote:

> In this test, there was no problem.
>
> But if you will try the bellow test you will fail with wrong instruction
> selection (*pd* instead of sd).
>
>
>
> declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>,
> <2 x double>, i8) nounwind readnone
>
>
>
> define <2 x double> @test_rcp14_sd() {
>
>     %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>
> <double 1.111110e+02, double -3.000000e+00>, <2 x double> <double
> -2.000000e+00, double 2.222220e+02>, < 2 x double> zeroinitializer, i8
> -1)
>
>     ret <2 x double> %res
>
> }
>
>
>
> *Without the patch *
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=skx* -o -
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .section        __TEXT,__literal16,16byte_literals
>
>         .p2align        4
>
> LCPI0_0:
>
>         .quad   4637519158471194313     ## double 111.111
>
>         .quad   -4609434218613702656    ## double -3
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .globl  _test_rcp14_sd
>
>         .p2align        4, 0x90
>
> _test_rcp14_sd:                         ## @test_rcp14_sd
>
>         .cfi_startproc
>
> ## BB#0:
>
>         *vrcp14pd*        LCPI0_0(%rip), %xmm0
>
>         retq
>
>         .cfi_endproc
>
>
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=knl* -o -
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .section        __TEXT,__literal16,16byte_literals
>
>         .p2align        4
>
> LCPI0_0:
>
>         .quad   -4611686018427387904    ## double -2
>
>         .quad   4642022758098564809     ## double 222.22200000000001
>
> LCPI0_1:
>
>         .quad   4637519158471194313     ## double 111.111
>
>         .quad   -4609434218613702656    ## double -3
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .globl  _test_rcp14_sd
>
>         .p2align        4, 0x90
>
> _test_rcp14_sd:                         ## @test_rcp14_sd
>
>         .cfi_startproc
>
> ## BB#0:
>
>         vmovaps LCPI0_0(%rip), %xmm0    ## xmm0 =
> [-2.000000e+00,2.222220e+02]
>
>         vmovaps LCPI0_1(%rip), %xmm1    ## xmm1 =
> [1.111110e+02,-3.000000e+00]
>
>         *vrcp14sd*        %xmm0, %xmm1, %xmm0
>
>         retq
>
>         .cfi_endproc
>
>
>
> *With patch *
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=skx* -o -
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .section        __TEXT,__literal16,16byte_literals
>
>         .p2align        4
>
> LCPI0_0:
>
>         .quad   -4611686018427387904    ## double -2
>
>         .quad   4642022758098564809     ## double 222.22200000000001
>
> LCPI0_1:
>
>         .quad   4637519158471194313     ## double 111.111
>
>         .quad   -4609434218613702656    ## double -3
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .globl  _test_rcp14_sd
>
>         .p2align        4, 0x90
>
> _test_rcp14_sd:                         ## @test_rcp14_sd
>
>         .cfi_startproc
>
> ## BB#0:
>
>         vmovapd LCPI0_0(%rip), %xmm0    ## xmm0 =
> [-2.000000e+00,2.222220e+02]
>
>         vmovapd LCPI0_1(%rip), %xmm1    ## xmm1 =
> [1.111110e+02,-3.000000e+00]
>
>         *vrcp14sd*        %xmm0, %xmm1, %xmm0
>
>         retq
>
>         .cfi_endproc
>
>
>
>
>
> .subsections_via_symbols
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=knl* -o -
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .section        __TEXT,__literal16,16byte_literals
>
>         .p2align        4
>
> LCPI0_0:
>
>         .quad   -4611686018427387904    ## double -2
>
>         .quad   4642022758098564809     ## double 222.22200000000001
>
> LCPI0_1:
>
>         .quad   4637519158471194313     ## double 111.111
>
>         .quad   -4609434218613702656    ## double -3
>
>         .section        __TEXT,__text,regular,pure_instructions
>
>         .globl  _test_rcp14_sd
>
>         .p2align        4, 0x90
>
> _test_rcp14_sd:                         ## @test_rcp14_sd
>
>         .cfi_startproc
>
> ## BB#0:
>
>         vmovaps LCPI0_0(%rip), %xmm0    ## xmm0 =
> [-2.000000e+00,2.222220e+02]
>
>         vmovaps LCPI0_1(%rip), %xmm1    ## xmm1 =
> [1.111110e+02,-3.000000e+00]
>
>         *vrcp14sd*        %xmm0, %xmm1, %xmm0
>
>         retq
>
>         .cfi_endproc
>
>
>
> Hope this was helpful
>
>
>
> Thanks,
>
> Michael Zuckerman
>
>
>
> *From:* Craig Topper [mailto:craig.topper at gmail.com
> <javascript:_e(%7B%7D,'cvml','craig.topper at gmail.com');>]
> *Sent:* Saturday, May 21, 2016 21:30
> *To:* Zuckerman, Michael <michael.zuckerman at intel.com
> <javascript:_e(%7B%7D,'cvml','michael.zuckerman at intel.com');>>
> *Cc:* llvm-commits <llvm-commits at lists.llvm.org
> <javascript:_e(%7B%7D,'cvml','llvm-commits at lists.llvm.org');>>
> *Subject:* Re: [llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and
> sqrt intrinsics.
>
>
>
> I can't tell from looking at the deleted test cases and the new test cases
> that it was picking the wrong instruction. What am I missing?
>
>
>
> On Sat, May 21, 2016 at 7:44 AM, Michael Zuckerman via llvm-commits <
> llvm-commits at lists.llvm.org
> <javascript:_e(%7B%7D,'cvml','llvm-commits at lists.llvm.org');>> wrote:
>
> Author: mzuckerm
> Date: Sat May 21 09:44:18 2016
> New Revision: 270322
>
> URL: http://llvm.org/viewvc/llvm-project?rev=270322&view=rev
> Log:
> [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.
>
> Differential Revision: http://reviews.llvm.org/D20438
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/lib/Target/X86/X86ISelLowering.h
>     llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>     llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>     llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>     llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 21 09:44:18 2016
> @@ -21753,7 +21753,9 @@ const char *X86TargetLowering::getTarget
>    case X86ISD::FMAXC:              return "X86ISD::FMAXC";
>    case X86ISD::FMINC:              return "X86ISD::FMINC";
>    case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
> +  case X86ISD::FRSQRTS:             return "X86ISD::FRSQRTS";
>    case X86ISD::FRCP:               return "X86ISD::FRCP";
> +  case X86ISD::FRCPS:              return "X86ISD::FRCPS";
>    case X86ISD::EXTRQI:             return "X86ISD::EXTRQI";
>    case X86ISD::INSERTQI:           return "X86ISD::INSERTQI";
>    case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat May 21 09:44:18 2016
> @@ -250,7 +250,8 @@ namespace llvm {
>        /// Note that these typically require refinement
>        /// in order to obtain suitable precision.
>        FRSQRT, FRCP,
> -
> +      FRSQRTS, FRCPS,
> +
>        // Thread Local Storage.
>        TLSADDR,
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat May 21 09:44:18
> 2016
> @@ -60,8 +60,8 @@ def X86fandn   : SDNode<"X86ISD::FANDN",
>                          [SDNPCommutative, SDNPAssociative]>;
>  def X86frsqrt  : SDNode<"X86ISD::FRSQRT",    SDTFPUnaryOp>;
>  def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
> -def X86frsqrt14s: SDNode<"X86ISD::FRSQRT",  SDTFPBinOp>;
> -def X86frcp14s : SDNode<"X86ISD::FRCP",    SDTFPBinOp>;
> +def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS",  SDTFPBinOp>;
> +def X86frcp14s : SDNode<"X86ISD::FRCPS",    SDTFPBinOp>;
>  def X86fhadd   : SDNode<"X86ISD::FHADD",     SDTFPBinOp>;
>  def X86fhsub   : SDNode<"X86ISD::FHSUB",     SDTFPBinOp>;
>  def X86hadd    : SDNode<"X86ISD::HADD",      SDTIntBinOp>;
>
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 21 09:44:18 2016
> @@ -2125,8 +2125,8 @@ static const IntrinsicData  IntrinsicsWi
>    X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK,
> X86ISD::FRCP, 0),
>    X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK,
> X86ISD::FRCP, 0),
>    X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK,
> X86ISD::FRCP, 0),
> -  X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCP, 0),
> -  X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCP, 0),
> +  X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCPS, 0),
> +  X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCPS, 0),
>    X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,
> X86ISD::RCP28, 0),
>    X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,
> X86ISD::RCP28, 0),
>    X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,
> X86ISD::RCP28, 0),
> @@ -2137,8 +2137,8 @@ static const IntrinsicData  IntrinsicsWi
>    X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK,
> X86ISD::FRSQRT, 0),
>    X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK,
> X86ISD::FRSQRT, 0),
>    X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK,
> X86ISD::FRSQRT, 0),
> -  X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRT, 0),
> -  X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRT, 0),
> +  X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRTS, 0),
> +  X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRTS, 0),
>    X86_INTRINSIC_DATA(avx512_rsqrt28_pd,
> INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
>    X86_INTRINSIC_DATA(avx512_rsqrt28_ps,
> INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
>    X86_INTRINSIC_DATA(avx512_rsqrt28_sd,
> INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 21 09:44:18
> 2016
> @@ -126,26 +126,6 @@ define <16 x float> @test_rsqrt_ps_512(<
>  }
>  declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x
> float>, i16) nounwind readnone
>
> -define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
> -; CHECK-LABEL: test_rsqrt14_ss:
> -; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vrsqrt14ss %xmm0, %xmm0, %xmm0
> -; CHECK-NEXT:    retq
> -  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4
> x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
> -  ret <4 x float> %res
> -}
> -declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> -
> -define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
> -; CHECK-LABEL: test_rcp14_ss:
> -; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
> -; CHECK-NEXT:    retq
> -  %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x
> float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
> -  ret <4 x float> %res
> -}
> -declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> -
>  define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
>  ; CHECK-LABEL: test_sqrt_pd_512:
>  ; CHECK:       ## BB#0:
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll Sat May 21
> 09:44:18 2016
> @@ -1,6 +1,48 @@
>  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
>  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
>
> +
> +define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
> +  ; CHECK-LABEL: test_rsqrt14_ss:
> +  ; CHECK:       ## BB#0:
> +  ; CHECK-NEXT:    vrsqrt14ss %xmm0, %xmm0, %xmm0
> +  ; CHECK-NEXT:    retq
> +    %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0,
> <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
> +    ret <4 x float> %res
> +}
> +declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> +
> +define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
> +  ; CHECK-LABEL: test_rcp14_ss:
> +  ; CHECK:       ## BB#0:
> +  ; CHECK-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
> +  ; CHECK-NEXT:    retq
> +    %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4
> x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
> +    ret <4 x float> %res
> +}
> +declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> +
> +define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
> +  ; CHECK-LABEL: test_rsqrt14_sd:
> +  ; CHECK:       ## BB#0:
> +  ; CHECK-NEXT:    vrsqrt14sd %xmm0, %xmm0, %xmm0
> +  ; CHECK-NEXT:    retq
> +    %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>
> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
> +    ret <2 x double> %res
> +}
> +declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x
> double>, <2 x double>, i8) nounwind readnone
> +
> +define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
> +  ; CHECK-LABEL: test_rcp14_sd:
> +  ; CHECK:       ## BB#0:
> +  ; CHECK-NEXT:    vrcp14sd %xmm0, %xmm0, %xmm0
> +  ; CHECK-NEXT:    retq
> +    %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0,
> <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
> +    ret <2 x double> %res
> +
> +}
> +declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x
> double>, <2 x double>, i8) nounwind readnone
> +
>  declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x
> float>,<4 x float>, i8, i32)
>  define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4
> x float> %x1, <4 x float> %x3, i8 %x4) {
>    ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> <javascript:_e(%7B%7D,'cvml','llvm-commits at lists.llvm.org');>
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
>
>
>
> --
>
> ~Craig
>
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
>


-- 
~Craig
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160522/6d641460/attachment.html>


More information about the llvm-commits mailing list