[llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.

Sun May 22 07:43:58 PDT 2016

In this test, there was no problem.
But if you will try the bellow test you will fail with wrong instruction selection (pd instead of sd).

declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone

define <2 x double> @test_rcp14_sd() {
    %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> <double 1.111110e+02, double -3.000000e+00>, <2 x double> <double -2.000000e+00, double 2.222220e+02>, < 2 x double> zeroinitializer, i8 -1)
    ret <2 x double> %res
}

Without the patch
-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=skx -o -
        .section        __TEXT,__text,regular,pure_instructions
        .section        __TEXT,__literal16,16byte_literals
        .p2align        4
LCPI0_0:
        .quad   4637519158471194313     ## double 111.111
        .quad   -4609434218613702656    ## double -3
        .section        __TEXT,__text,regular,pure_instructions
        .globl  _test_rcp14_sd
        .p2align        4, 0x90
_test_rcp14_sd:                         ## @test_rcp14_sd
        .cfi_startproc
## BB#0:
        vrcp14pd        LCPI0_0(%rip), %xmm0
        retq
        .cfi_endproc

-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=knl -o -
        .section        __TEXT,__text,regular,pure_instructions
        .section        __TEXT,__literal16,16byte_literals
        .p2align        4
LCPI0_0:
        .quad   -4611686018427387904    ## double -2
        .quad   4642022758098564809     ## double 222.22200000000001
LCPI0_1:
        .quad   4637519158471194313     ## double 111.111
        .quad   -4609434218613702656    ## double -3
        .section        __TEXT,__text,regular,pure_instructions
        .globl  _test_rcp14_sd
        .p2align        4, 0x90
_test_rcp14_sd:                         ## @test_rcp14_sd
        .cfi_startproc
## BB#0:
        vmovaps LCPI0_0(%rip), %xmm0    ## xmm0 = [-2.000000e+00,2.222220e+02]
        vmovaps LCPI0_1(%rip), %xmm1    ## xmm1 = [1.111110e+02,-3.000000e+00]
        vrcp14sd        %xmm0, %xmm1, %xmm0
        retq
        .cfi_endproc

With patch
-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=skx -o -
        .section        __TEXT,__text,regular,pure_instructions
        .section        __TEXT,__literal16,16byte_literals
        .p2align        4
LCPI0_0:
        .quad   -4611686018427387904    ## double -2
        .quad   4642022758098564809     ## double 222.22200000000001
LCPI0_1:
        .quad   4637519158471194313     ## double 111.111
        .quad   -4609434218613702656    ## double -3
        .section        __TEXT,__text,regular,pure_instructions
        .globl  _test_rcp14_sd
        .p2align        4, 0x90
_test_rcp14_sd:                         ## @test_rcp14_sd
        .cfi_startproc
## BB#0:
        vmovapd LCPI0_0(%rip), %xmm0    ## xmm0 = [-2.000000e+00,2.222220e+02]
        vmovapd LCPI0_1(%rip), %xmm1    ## xmm1 = [1.111110e+02,-3.000000e+00]
        vrcp14sd        %xmm0, %xmm1, %xmm0
        retq
        .cfi_endproc


.subsections_via_symbols
-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=knl -o -
        .section        __TEXT,__text,regular,pure_instructions
        .section        __TEXT,__literal16,16byte_literals
        .p2align        4
LCPI0_0:
        .quad   -4611686018427387904    ## double -2
        .quad   4642022758098564809     ## double 222.22200000000001
LCPI0_1:
        .quad   4637519158471194313     ## double 111.111
        .quad   -4609434218613702656    ## double -3
        .section        __TEXT,__text,regular,pure_instructions
        .globl  _test_rcp14_sd
        .p2align        4, 0x90
_test_rcp14_sd:                         ## @test_rcp14_sd
        .cfi_startproc
## BB#0:
        vmovaps LCPI0_0(%rip), %xmm0    ## xmm0 = [-2.000000e+00,2.222220e+02]
        vmovaps LCPI0_1(%rip), %xmm1    ## xmm1 = [1.111110e+02,-3.000000e+00]
        vrcp14sd        %xmm0, %xmm1, %xmm0
        retq
        .cfi_endproc

Hope this was helpful

Thanks,
Michael Zuckerman

From: Craig Topper [mailto:craig.topper at gmail.com]
Sent: Saturday, May 21, 2016 21:30
To: Zuckerman, Michael <michael.zuckerman at intel.com>
Cc: llvm-commits <llvm-commits at lists.llvm.org>
Subject: Re: [llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.

I can't tell from looking at the deleted test cases and the new test cases that it was picking the wrong instruction. What am I missing?

On Sat, May 21, 2016 at 7:44 AM, Michael Zuckerman via llvm-commits <llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>> wrote:
Author: mzuckerm
Date: Sat May 21 09:44:18 2016
New Revision: 270322

URL: http://llvm.org/viewvc/llvm-project?rev=270322&view=rev
Log:
[Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.

Differential Revision: http://reviews.llvm.org/D20438

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 21 09:44:18 2016
@@ -21753,7 +21753,9 @@ const char *X86TargetLowering::getTarget
   case X86ISD::FMAXC:              return "X86ISD::FMAXC";
   case X86ISD::FMINC:              return "X86ISD::FMINC";
   case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
+  case X86ISD::FRSQRTS:             return "X86ISD::FRSQRTS";
   case X86ISD::FRCP:               return "X86ISD::FRCP";
+  case X86ISD::FRCPS:              return "X86ISD::FRCPS";
   case X86ISD::EXTRQI:             return "X86ISD::EXTRQI";
   case X86ISD::INSERTQI:           return "X86ISD::INSERTQI";
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat May 21 09:44:18 2016
@@ -250,7 +250,8 @@ namespace llvm {
       /// Note that these typically require refinement
       /// in order to obtain suitable precision.
       FRSQRT, FRCP,
-
+      FRSQRTS, FRCPS,
+
       // Thread Local Storage.
       TLSADDR,


Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat May 21 09:44:18 2016
@@ -60,8 +60,8 @@ def X86fandn   : SDNode<"X86ISD::FANDN",
                         [SDNPCommutative, SDNPAssociative]>;
 def X86frsqrt  : SDNode<"X86ISD::FRSQRT",    SDTFPUnaryOp>;
 def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
-def X86frsqrt14s: SDNode<"X86ISD::FRSQRT",  SDTFPBinOp>;
-def X86frcp14s : SDNode<"X86ISD::FRCP",    SDTFPBinOp>;
+def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS",  SDTFPBinOp>;
+def X86frcp14s : SDNode<"X86ISD::FRCPS",    SDTFPBinOp>;
 def X86fhadd   : SDNode<"X86ISD::FHADD",     SDTFPBinOp>;
 def X86fhsub   : SDNode<"X86ISD::FHSUB",     SDTFPBinOp>;
 def X86hadd    : SDNode<"X86ISD::HADD",      SDTIntBinOp>;

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 21 09:44:18 2016
@@ -2125,8 +2125,8 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
   X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
   X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
-  X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
-  X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
@@ -2137,8 +2137,8 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 21 09:44:18 2016
@@ -126,26 +126,6 @@ define <16 x float> @test_rsqrt_ps_512(<
 }
 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone

-define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rsqrt14_ss:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vrsqrt14ss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
-define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rcp14_ss:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
 ; CHECK-LABEL: test_sqrt_pd_512:
 ; CHECK:       ## BB#0:

Modified: llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll Sat May 21 09:44:18 2016
@@ -1,6 +1,48 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s

+
+define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
+  ; CHECK-LABEL: test_rsqrt14_ss:
+  ; CHECK:       ## BB#0:
+  ; CHECK-NEXT:    vrsqrt14ss %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT:    retq
+    %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
+    ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
+  ; CHECK-LABEL: test_rcp14_ss:
+  ; CHECK:       ## BB#0:
+  ; CHECK-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT:    retq
+    %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
+    ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
+  ; CHECK-LABEL: test_rsqrt14_sd:
+  ; CHECK:       ## BB#0:
+  ; CHECK-NEXT:    vrsqrt14sd %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT:    retq
+    %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd<http://llvm.x86.avx512.rsqrt14.sd>(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
+    ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.rsqrt14.sd<http://llvm.x86.avx512.rsqrt14.sd>(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
+  ; CHECK-LABEL: test_rcp14_sd:
+  ; CHECK:       ## BB#0:
+  ; CHECK-NEXT:    vrcp14sd %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT:    retq
+    %res = call <2 x double> @llvm.x86.avx512.rcp14.sd<http://llvm.x86.avx512.rcp14.sd>(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
+    ret <2 x double> %res
+
+}
+declare <2 x double> @llvm.x86.avx512.rcp14.sd<http://llvm.x86.avx512.rcp14.sd>(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
   ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:


_______________________________________________
llvm-commits mailing list
llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits



--
~Craig
---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160522/e882f09c/attachment-0001.html>