[llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.
Zuckerman, Michael via llvm-commits
llvm-commits at lists.llvm.org
Sun May 22 07:43:58 PDT 2016
In this test, there was no problem.
But if you will try the bellow test you will fail with wrong instruction selection (pd instead of sd).
declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_rcp14_sd() {
%res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> <double 1.111110e+02, double -3.000000e+00>, <2 x double> <double -2.000000e+00, double 2.222220e+02>, < 2 x double> zeroinitializer, i8 -1)
ret <2 x double> %res
}
Without the patch
-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=skx -o -
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__literal16,16byte_literals
.p2align 4
LCPI0_0:
.quad 4637519158471194313 ## double 111.111
.quad -4609434218613702656 ## double -3
.section __TEXT,__text,regular,pure_instructions
.globl _test_rcp14_sd
.p2align 4, 0x90
_test_rcp14_sd: ## @test_rcp14_sd
.cfi_startproc
## BB#0:
vrcp14pd LCPI0_0(%rip), %xmm0
retq
.cfi_endproc
-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=knl -o -
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__literal16,16byte_literals
.p2align 4
LCPI0_0:
.quad -4611686018427387904 ## double -2
.quad 4642022758098564809 ## double 222.22200000000001
LCPI0_1:
.quad 4637519158471194313 ## double 111.111
.quad -4609434218613702656 ## double -3
.section __TEXT,__text,regular,pure_instructions
.globl _test_rcp14_sd
.p2align 4, 0x90
_test_rcp14_sd: ## @test_rcp14_sd
.cfi_startproc
## BB#0:
vmovaps LCPI0_0(%rip), %xmm0 ## xmm0 = [-2.000000e+00,2.222220e+02]
vmovaps LCPI0_1(%rip), %xmm1 ## xmm1 = [1.111110e+02,-3.000000e+00]
vrcp14sd %xmm0, %xmm1, %xmm0
retq
.cfi_endproc
With patch
-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=skx -o -
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__literal16,16byte_literals
.p2align 4
LCPI0_0:
.quad -4611686018427387904 ## double -2
.quad 4642022758098564809 ## double 222.22200000000001
LCPI0_1:
.quad 4637519158471194313 ## double 111.111
.quad -4609434218613702656 ## double -3
.section __TEXT,__text,regular,pure_instructions
.globl _test_rcp14_sd
.p2align 4, 0x90
_test_rcp14_sd: ## @test_rcp14_sd
.cfi_startproc
## BB#0:
vmovapd LCPI0_0(%rip), %xmm0 ## xmm0 = [-2.000000e+00,2.222220e+02]
vmovapd LCPI0_1(%rip), %xmm1 ## xmm1 = [1.111110e+02,-3.000000e+00]
vrcp14sd %xmm0, %xmm1, %xmm0
retq
.cfi_endproc
.subsections_via_symbols
-bash-4.2$ llc < test3.ll -mtriple=x86_64-apple-darwin -mcpu=knl -o -
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__literal16,16byte_literals
.p2align 4
LCPI0_0:
.quad -4611686018427387904 ## double -2
.quad 4642022758098564809 ## double 222.22200000000001
LCPI0_1:
.quad 4637519158471194313 ## double 111.111
.quad -4609434218613702656 ## double -3
.section __TEXT,__text,regular,pure_instructions
.globl _test_rcp14_sd
.p2align 4, 0x90
_test_rcp14_sd: ## @test_rcp14_sd
.cfi_startproc
## BB#0:
vmovaps LCPI0_0(%rip), %xmm0 ## xmm0 = [-2.000000e+00,2.222220e+02]
vmovaps LCPI0_1(%rip), %xmm1 ## xmm1 = [1.111110e+02,-3.000000e+00]
vrcp14sd %xmm0, %xmm1, %xmm0
retq
.cfi_endproc
Hope this was helpful
Thanks,
Michael Zuckerman
From: Craig Topper [mailto:craig.topper at gmail.com]
Sent: Saturday, May 21, 2016 21:30
To: Zuckerman, Michael <michael.zuckerman at intel.com>
Cc: llvm-commits <llvm-commits at lists.llvm.org>
Subject: Re: [llvm] r270322 - [Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.
I can't tell from looking at the deleted test cases and the new test cases that it was picking the wrong instruction. What am I missing?
On Sat, May 21, 2016 at 7:44 AM, Michael Zuckerman via llvm-commits <llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>> wrote:
Author: mzuckerm
Date: Sat May 21 09:44:18 2016
New Revision: 270322
URL: http://llvm.org/viewvc/llvm-project?rev=270322&view=rev
Log:
[Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.
Differential Revision: http://reviews.llvm.org/D20438
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 21 09:44:18 2016
@@ -21753,7 +21753,9 @@ const char *X86TargetLowering::getTarget
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
+ case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::FRCPS: return "X86ISD::FRCPS";
case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat May 21 09:44:18 2016
@@ -250,7 +250,8 @@ namespace llvm {
/// Note that these typically require refinement
/// in order to obtain suitable precision.
FRSQRT, FRCP,
-
+ FRSQRTS, FRCPS,
+
// Thread Local Storage.
TLSADDR,
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat May 21 09:44:18 2016
@@ -60,8 +60,8 @@ def X86fandn : SDNode<"X86ISD::FANDN",
[SDNPCommutative, SDNPAssociative]>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
-def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>;
-def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>;
+def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>;
+def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 21 09:44:18 2016
@@ -2125,8 +2125,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
@@ -2137,8 +2137,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 21 09:44:18 2016
@@ -126,26 +126,6 @@ define <16 x float> @test_rsqrt_ps_512(<
}
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
-define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rsqrt14_ss:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: retq
- %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
-define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rcp14_ss:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: retq
- %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
; CHECK-LABEL: test_sqrt_pd_512:
; CHECK: ## BB#0:
Modified: llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll?rev=270322&r1=270321&r2=270322&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll Sat May 21 09:44:18 2016
@@ -1,6 +1,48 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
+ ; CHECK-LABEL: test_rsqrt14_ss:
+ ; CHECK: ## BB#0:
+ ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
+ ; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
+ ; CHECK-LABEL: test_rcp14_ss:
+ ; CHECK: ## BB#0:
+ ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
+ ; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
+ ; CHECK-LABEL: test_rsqrt14_sd:
+ ; CHECK: ## BB#0:
+ ; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0
+ ; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd<http://llvm.x86.avx512.rsqrt14.sd>(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.rsqrt14.sd<http://llvm.x86.avx512.rsqrt14.sd>(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
+ ; CHECK-LABEL: test_rcp14_sd:
+ ; CHECK: ## BB#0:
+ ; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0
+ ; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.rcp14.sd<http://llvm.x86.avx512.rcp14.sd>(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
+ ret <2 x double> %res
+
+}
+declare <2 x double> @llvm.x86.avx512.rcp14.sd<http://llvm.x86.avx512.rcp14.sd>(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
_______________________________________________
llvm-commits mailing list
llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
--
~Craig
---------------------------------------------------------------------
Intel Israel (74) Limited
This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160522/e882f09c/attachment-0001.html>
More information about the llvm-commits
mailing list