[llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun May 22 08:58:25 PDT 2016
Can you commit those test cases so this fix is checked?
On Sunday, May 22, 2016, Zuckerman, Michael <michael.zuckerman at intel.com>
wrote:
> In this test, there was no problem.
>
> But if you will try the bellow test you will fail with wrong instruction
> selection (*pd* instead of sd).
>
>
>
> declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>,
> <2 x double>, i8) nounwind readnone
>
>
>
> define <2 x double> @test_rcp14_sd() {
>
> %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>
> <double 1.111110e+02, double -3.000000e+00>, <2 x double> <double
> -2.000000e+00, double 2.222220e+02>, < 2 x double> zeroinitializer, i8
> -1)
>
> ret <2 x double> %res
>
> }
>
>
>
> *Without the patch *
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=skx* -o -
>
> .section __TEXT,__text,regular,pure_instructions
>
> .section __TEXT,__literal16,16byte_literals
>
> .p2align 4
>
> LCPI0_0:
>
> .quad 4637519158471194313 ## double 111.111
>
> .quad -4609434218613702656 ## double -3
>
> .section __TEXT,__text,regular,pure_instructions
>
> .globl _test_rcp14_sd
>
> .p2align 4, 0x90
>
> _test_rcp14_sd: ## @test_rcp14_sd
>
> .cfi_startproc
>
> ## BB#0:
>
> *vrcp14pd* LCPI0_0(%rip), %xmm0
>
> retq
>
> .cfi_endproc
>
>
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=knl* -o -
>
> .section __TEXT,__text,regular,pure_instructions
>
> .section __TEXT,__literal16,16byte_literals
>
> .p2align 4
>
> LCPI0_0:
>
> .quad -4611686018427387904 ## double -2
>
> .quad 4642022758098564809 ## double 222.22200000000001
>
> LCPI0_1:
>
> .quad 4637519158471194313 ## double 111.111
>
> .quad -4609434218613702656 ## double -3
>
> .section __TEXT,__text,regular,pure_instructions
>
> .globl _test_rcp14_sd
>
> .p2align 4, 0x90
>
> _test_rcp14_sd: ## @test_rcp14_sd
>
> .cfi_startproc
>
> ## BB#0:
>
> vmovaps LCPI0_0(%rip), %xmm0 ## xmm0 =
> [-2.000000e+00,2.222220e+02]
>
> vmovaps LCPI0_1(%rip), %xmm1 ## xmm1 =
> [1.111110e+02,-3.000000e+00]
>
> *vrcp14sd* %xmm0, %xmm1, %xmm0
>
> retq
>
> .cfi_endproc
>
>
>
> *With patch *
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=skx* -o -
>
> .section __TEXT,__text,regular,pure_instructions
>
> .section __TEXT,__literal16,16byte_literals
>
> .p2align 4
>
> LCPI0_0:
>
> .quad -4611686018427387904 ## double -2
>
> .quad 4642022758098564809 ## double 222.22200000000001
>
> LCPI0_1:
>
> .quad 4637519158471194313 ## double 111.111
>
> .quad -4609434218613702656 ## double -3
>
> .section __TEXT,__text,regular,pure_instructions
>
> .globl _test_rcp14_sd
>
> .p2align 4, 0x90
>
> _test_rcp14_sd: ## @test_rcp14_sd
>
> .cfi_startproc
>
> ## BB#0:
>
> vmovapd LCPI0_0(%rip), %xmm0 ## xmm0 =
> [-2.000000e+00,2.222220e+02]
>
> vmovapd LCPI0_1(%rip), %xmm1 ## xmm1 =
> [1.111110e+02,-3.000000e+00]
>
> *vrcp14sd* %xmm0, %xmm1, %xmm0
>
> retq
>
> .cfi_endproc
>
>
>
>
>
> .subsections_via_symbols
>
> -bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin *-mcpu=knl* -o -
>
> .section __TEXT,__text,regular,pure_instructions
>
> .section __TEXT,__literal16,16byte_literals
>
> .p2align 4
>
> LCPI0_0:
>
> .quad -4611686018427387904 ## double -2
>
> .quad 4642022758098564809 ## double 222.22200000000001
>
> LCPI0_1:
>
> .quad 4637519158471194313 ## double 111.111
>
> .quad -4609434218613702656 ## double -3
>
> .section __TEXT,__text,regular,pure_instructions
>
> .globl _test_rcp14_sd
>
> .p2align 4, 0x90
>
> _test_rcp14_sd: ## @test_rcp14_sd
>
> .cfi_startproc
>
> ## BB#0:
>
> vmovaps LCPI0_0(%rip), %xmm0 ## xmm0 =
> [-2.000000e+00,2.222220e+02]
>
> vmovaps LCPI0_1(%rip), %xmm1 ## xmm1 =
> [1.111110e+02,-3.000000e+00]
>
> *vrcp14sd* %xmm0, %xmm1, %xmm0
>
> retq
>
> .cfi_endproc
>
>
>
> Hope this was helpful
>
>
>
> Thanks,
>
> Michael Zuckerman
>
>
>
> *From:* Craig Topper [mailto:craig.topper at gmail.com
> <javascript:_e(%7B%7D,'cvml','craig.topper at gmail.com');>]
> *Sent:* Saturday, May 21, 2016 21:30
> *To:* Zuckerman, Michael <michael.zuckerman at intel.com
> <javascript:_e(%7B%7D,'cvml','michael.zuckerman at intel.com');>>
> *Cc:* llvm-commits <llvm-commits at lists.llvm.org
> <javascript:_e(%7B%7D,'cvml','llvm-commits at lists.llvm.org');>>
> *Subject:* Re: [llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and
> sqrt intrinsics.
>
>
>
> I can't tell from looking at the deleted test cases and the new test cases
> that it was picking the wrong instruction. What am I missing?
>
>
>
> On Sat, May 21, 2016 at 7:44 AM, Michael Zuckerman via llvm-commits <
> llvm-commits at lists.llvm.org
> <javascript:_e(%7B%7D,'cvml','llvm-commits at lists.llvm.org');>> wrote:
>
> Author: mzuckerm
> Date: Sat May 21 09:44:18 2016
> New Revision: 270322
>
> URL: http://llvm.org/viewvc/llvm-project?rev=270322&view=rev
> Log:
> [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.
>
> Differential Revision: http://reviews.llvm.org/D20438
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 21 09:44:18 2016
> @@ -21753,7 +21753,9 @@ const char *X86TargetLowering::getTarget
> case X86ISD::FMAXC: return "X86ISD::FMAXC";
> case X86ISD::FMINC: return "X86ISD::FMINC";
> case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
> + case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
> case X86ISD::FRCP: return "X86ISD::FRCP";
> + case X86ISD::FRCPS: return "X86ISD::FRCPS";
> case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
> case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
> case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat May 21 09:44:18 2016
> @@ -250,7 +250,8 @@ namespace llvm {
> /// Note that these typically require refinement
> /// in order to obtain suitable precision.
> FRSQRT, FRCP,
> -
> + FRSQRTS, FRCPS,
> +
> // Thread Local Storage.
> TLSADDR,
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat May 21 09:44:18
> 2016
> @@ -60,8 +60,8 @@ def X86fandn : SDNode<"X86ISD::FANDN",
> [SDNPCommutative, SDNPAssociative]>;
> def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
> def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
> -def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>;
> -def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>;
> +def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>;
> +def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>;
> def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
> def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
> def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
>
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 21 09:44:18 2016
> @@ -2125,8 +2125,8 @@ static const IntrinsicData IntrinsicsWi
> X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK,
> X86ISD::FRCP, 0),
> X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK,
> X86ISD::FRCP, 0),
> X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK,
> X86ISD::FRCP, 0),
> - X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCP, 0),
> - X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCP, 0),
> + X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCPS, 0),
> + X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRCPS, 0),
> X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,
> X86ISD::RCP28, 0),
> X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,
> X86ISD::RCP28, 0),
> X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,
> X86ISD::RCP28, 0),
> @@ -2137,8 +2137,8 @@ static const IntrinsicData IntrinsicsWi
> X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK,
> X86ISD::FRSQRT, 0),
> X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK,
> X86ISD::FRSQRT, 0),
> X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK,
> X86ISD::FRSQRT, 0),
> - X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRT, 0),
> - X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRT, 0),
> + X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRTS, 0),
> + X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK,
> X86ISD::FRSQRTS, 0),
> X86_INTRINSIC_DATA(avx512_rsqrt28_pd,
> INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
> X86_INTRINSIC_DATA(avx512_rsqrt28_ps,
> INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
> X86_INTRINSIC_DATA(avx512_rsqrt28_sd,
> INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 21 09:44:18
> 2016
> @@ -126,26 +126,6 @@ define <16 x float> @test_rsqrt_ps_512(<
> }
> declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x
> float>, i16) nounwind readnone
>
> -define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
> -; CHECK-LABEL: test_rsqrt14_ss:
> -; CHECK: ## BB#0:
> -; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
> -; CHECK-NEXT: retq
> - %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4
> x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
> - ret <4 x float> %res
> -}
> -declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> -
> -define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
> -; CHECK-LABEL: test_rcp14_ss:
> -; CHECK: ## BB#0:
> -; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
> -; CHECK-NEXT: retq
> - %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x
> float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
> - ret <4 x float> %res
> -}
> -declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> -
> define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
> ; CHECK-LABEL: test_sqrt_pd_512:
> ; CHECK: ## BB#0:
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll Sat May 21
> 09:44:18 2016
> @@ -1,6 +1,48 @@
> ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
> ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
>
> +
> +define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
> + ; CHECK-LABEL: test_rsqrt14_ss:
> + ; CHECK: ## BB#0:
> + ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
> + ; CHECK-NEXT: retq
> + %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0,
> <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
> + ret <4 x float> %res
> +}
> +declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> +
> +define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
> + ; CHECK-LABEL: test_rcp14_ss:
> + ; CHECK: ## BB#0:
> + ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
> + ; CHECK-NEXT: retq
> + %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4
> x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
> + ret <4 x float> %res
> +}
> +declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>,
> <4 x float>, i8) nounwind readnone
> +
> +define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
> + ; CHECK-LABEL: test_rsqrt14_sd:
> + ; CHECK: ## BB#0:
> + ; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0
> + ; CHECK-NEXT: retq
> + %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>
> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
> + ret <2 x double> %res
> +}
> +declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x
> double>, <2 x double>, i8) nounwind readnone
> +
> +define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
> + ; CHECK-LABEL: test_rcp14_sd:
> + ; CHECK: ## BB#0:
> + ; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0
> + ; CHECK-NEXT: retq
> + %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0,
> <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
> + ret <2 x double> %res
> +
> +}
> +declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x
> double>, <2 x double>, i8) nounwind readnone
> +
> declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x
> float>,<4 x float>, i8, i32)
> define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4
> x float> %x1, <4 x float> %x3, i8 %x4) {
> ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> <javascript:_e(%7B%7D,'cvml','llvm-commits at lists.llvm.org');>
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
>
>
>
> --
>
> ~Craig
>
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
>
--
~Craig
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160522/6d641460/attachment.html>
More information about the llvm-commits
mailing list