[llvm] 87c7730 - [PowerPC] Exploit VSX rounding instrs for rint
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 05:05:49 PST 2020
Author: Qiu Chaofan
Date: 2020-02-13T20:59:50+08:00
New Revision: 87c773082a8d76f1d4b0d74386fcf01205b5f14a
URL: https://github.com/llvm/llvm-project/commit/87c773082a8d76f1d4b0d74386fcf01205b5f14a
DIFF: https://github.com/llvm/llvm-project/commit/87c773082a8d76f1d4b0d74386fcf01205b5f14a.diff
LOG: [PowerPC] Exploit VSX rounding instrs for rint
Exploit native VSX rounding instruction, x(v|s)r(d|s)pic, which does
rounding using current rounding mode.
According to C standard library, rint may raise INEXACT exception while
nearbyint won't.
Reviewed By: lkail
Differential Revision: https://reviews.llvm.org/D72685
Added:
llvm/test/CodeGen/PowerPC/vector-rounding-ops.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll
llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index bedd8fb30af4..2b6d370c4bff 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -799,12 +799,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index be6b30ffa08b..b7a4ce4a2788 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2563,6 +2563,14 @@ def : Pat<(f32 (fceil f32:$S)),
def : Pat<(f32 (ftrunc f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIZ
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (frint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
+
+// Rounding for double precision.
+def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
}
// Materialize a zero-vector of long long
diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll
index b31e864b6769..dfa8fcdfc9c0 100644
--- a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll
@@ -11,118 +11,34 @@
define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind {
; CHECK-LE-LABEL: splat_swap:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: mflr 0
-; CHECK-LE-NEXT: std 0, 16(1)
-; CHECK-LE-NEXT: stdu 1, -80(1)
-; CHECK-LE-NEXT: li 3, 64
-; CHECK-LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; CHECK-LE-NEXT: xvadddp 63, 34, 35
-; CHECK-LE-NEXT: xxlor 1, 63, 63
-; CHECK-LE-NEXT: bl rint
-; CHECK-LE-NEXT: nop
-; CHECK-LE-NEXT: xxswapd 0, 63
-; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-LE-NEXT: li 3, 48
-; CHECK-LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; CHECK-LE-NEXT: fmr 1, 0
-; CHECK-LE-NEXT: bl rint
-; CHECK-LE-NEXT: nop
-; CHECK-LE-NEXT: li 3, 48
-; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; CHECK-LE-NEXT: li 3, 64
-; CHECK-LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; CHECK-LE-NEXT: xxmrghd 0, 0, 1
+; CHECK-LE-NEXT: xvadddp 0, 34, 35
+; CHECK-LE-NEXT: xvrdpic 0, 0
; CHECK-LE-NEXT: xxswapd 1, 0
; CHECK-LE-NEXT: xssubdp 1, 1, 0
-; CHECK-LE-NEXT: addi 1, 1, 80
-; CHECK-LE-NEXT: ld 0, 16(1)
-; CHECK-LE-NEXT: mtlr 0
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: splat_swap:
; CHECK-BE: # %bb.0:
-; CHECK-BE-NEXT: mflr 0
-; CHECK-BE-NEXT: std 0, 16(1)
-; CHECK-BE-NEXT: stdu 1, -160(1)
-; CHECK-BE-NEXT: li 3, 144
-; CHECK-BE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; CHECK-BE-NEXT: xvadddp 63, 34, 35
-; CHECK-BE-NEXT: xxlor 1, 63, 63
-; CHECK-BE-NEXT: bl rint
-; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-BE-NEXT: li 3, 128
-; CHECK-BE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; CHECK-BE-NEXT: xxswapd 1, 63
-; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; CHECK-BE-NEXT: bl rint
-; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: li 3, 128
-; CHECK-BE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-BE-NEXT: li 3, 144
-; CHECK-BE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; CHECK-BE-NEXT: xxmrghd 0, 0, 1
+; CHECK-BE-NEXT: xvadddp 0, 34, 35
+; CHECK-BE-NEXT: xvrdpic 0, 0
; CHECK-BE-NEXT: xxswapd 1, 0
; CHECK-BE-NEXT: xssubdp 1, 0, 1
-; CHECK-BE-NEXT: addi 1, 1, 160
-; CHECK-BE-NEXT: ld 0, 16(1)
-; CHECK-BE-NEXT: mtlr 0
; CHECK-BE-NEXT: blr
;
; CHECK-P9LE-LABEL: splat_swap:
; CHECK-P9LE: # %bb.0:
-; CHECK-P9LE-NEXT: mflr 0
-; CHECK-P9LE-NEXT: std 0, 16(1)
-; CHECK-P9LE-NEXT: stdu 1, -64(1)
-; CHECK-P9LE-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
-; CHECK-P9LE-NEXT: xvadddp 63, 34, 35
-; CHECK-P9LE-NEXT: xscpsgndp 1, 63, 63
-; CHECK-P9LE-NEXT: bl rint
-; CHECK-P9LE-NEXT: nop
-; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-P9LE-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; CHECK-P9LE-NEXT: xxswapd 1, 63
-; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; CHECK-P9LE-NEXT: bl rint
-; CHECK-P9LE-NEXT: nop
-; CHECK-P9LE-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-P9LE-NEXT: xxmrghd 0, 0, 1
-; CHECK-P9LE-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; CHECK-P9LE-NEXT: xvadddp 0, 34, 35
+; CHECK-P9LE-NEXT: xvrdpic 0, 0
; CHECK-P9LE-NEXT: xxswapd 1, 0
; CHECK-P9LE-NEXT: xssubdp 1, 1, 0
-; CHECK-P9LE-NEXT: addi 1, 1, 64
-; CHECK-P9LE-NEXT: ld 0, 16(1)
-; CHECK-P9LE-NEXT: mtlr 0
; CHECK-P9LE-NEXT: blr
;
; CHECK-P9BE-LABEL: splat_swap:
; CHECK-P9BE: # %bb.0:
-; CHECK-P9BE-NEXT: mflr 0
-; CHECK-P9BE-NEXT: std 0, 16(1)
-; CHECK-P9BE-NEXT: stdu 1, -144(1)
-; CHECK-P9BE-NEXT: stxv 63, 128(1) # 16-byte Folded Spill
-; CHECK-P9BE-NEXT: xvadddp 63, 34, 35
-; CHECK-P9BE-NEXT: xscpsgndp 1, 63, 63
-; CHECK-P9BE-NEXT: bl rint
-; CHECK-P9BE-NEXT: nop
-; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-P9BE-NEXT: stxv 1, 112(1) # 16-byte Folded Spill
-; CHECK-P9BE-NEXT: xxswapd 1, 63
-; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; CHECK-P9BE-NEXT: bl rint
-; CHECK-P9BE-NEXT: nop
-; CHECK-P9BE-NEXT: lxv 0, 112(1) # 16-byte Folded Reload
-; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-P9BE-NEXT: xxmrghd 0, 0, 1
-; CHECK-P9BE-NEXT: lxv 63, 128(1) # 16-byte Folded Reload
+; CHECK-P9BE-NEXT: xvadddp 0, 34, 35
+; CHECK-P9BE-NEXT: xvrdpic 0, 0
; CHECK-P9BE-NEXT: xxswapd 1, 0
; CHECK-P9BE-NEXT: xssubdp 1, 0, 1
-; CHECK-P9BE-NEXT: addi 1, 1, 144
-; CHECK-P9BE-NEXT: ld 0, 16(1)
-; CHECK-P9BE-NEXT: mtlr 0
; CHECK-P9BE-NEXT: blr
%added = fadd <2 x double> %x, %y
%call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
index 7f18853b8b99..3874765d75c5 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
@@ -559,3 +559,47 @@ entry:
}
declare float @llvm.ceil.f32(float)
+
+define dso_local double @test_rint(double %d) local_unnamed_addr {
+; BE-LABEL: test_rint:
+; BE: # %bb.0: # %entry
+; BE-NEXT: xsrdpic f1, f1
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_rint:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpic f1, f1
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: test_rint:
+; FAST: # %bb.0: # %entry
+; FAST-NEXT: xsrdpic f1, f1
+; FAST-NEXT: blr
+entry:
+ %0 = tail call double @llvm.rint.f64(double %d)
+ ret double %0
+}
+
+declare double @llvm.rint.f64(double)
+
+define dso_local float @test_rintf(float %f) local_unnamed_addr {
+; BE-LABEL: test_rintf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: xsrdpic f1, f1
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_rintf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpic f1, f1
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: test_rintf:
+; FAST: # %bb.0: # %entry
+; FAST-NEXT: xsrdpic f1, f1
+; FAST-NEXT: blr
+entry:
+ %0 = tail call float @llvm.rint.f32(float %f)
+ ret float %0
+}
+
+declare float @llvm.rint.f32(float)
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 253e74cf0bf3..27b5797b5feb 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -4748,34 +4748,20 @@ entry:
define <1 x float> @constrained_vector_rint_v1f32() #0 {
; PC64LE-LABEL: constrained_vector_rint_v1f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -32(1)
; PC64LE-NEXT: addis 3, 2, .LCPI75_0 at toc@ha
-; PC64LE-NEXT: lfs 1, .LCPI75_0 at toc@l(3)
-; PC64LE-NEXT: bl rintf
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: xscvdpspn 0, 1
+; PC64LE-NEXT: lfs 0, .LCPI75_0 at toc@l(3)
+; PC64LE-NEXT: xsrdpic 0, 0
+; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT: addi 1, 1, 32
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v1f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -32(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI75_0 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI75_0 at toc@l(3)
-; PC64LE9-NEXT: bl rintf
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: xscvdpspn 0, 1
+; PC64LE9-NEXT: lfs 0, .LCPI75_0 at toc@l(3)
+; PC64LE9-NEXT: xsrdpic 0, 0
+; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT: addi 1, 1, 32
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
@@ -4788,50 +4774,19 @@ entry:
define <2 x double> @constrained_vector_rint_v2f64() #0 {
; PC64LE-LABEL: constrained_vector_rint_v2f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: addis 3, 2, .LCPI76_0 at toc@ha
-; PC64LE-NEXT: lfd 1, .LCPI76_0 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI76_1 at toc@ha
-; PC64LE-NEXT: lfs 1, .LCPI76_1 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: xxmrghd 34, 1, 0
-; PC64LE-NEXT: addi 1, 1, 64
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: addi 3, 3, .LCPI76_0 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xvrdpic 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v2f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI76_0 at toc@ha
-; PC64LE9-NEXT: lfd 1, .LCPI76_0 at toc@l(3)
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addis 3, 2, .LCPI76_1 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI76_1 at toc@l(3)
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 34, 1, 0
-; PC64LE9-NEXT: addi 1, 1, 48
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI76_0 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
+; PC64LE9-NEXT: xvrdpic 34, 0
; PC64LE9-NEXT: blr
entry:
%rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
@@ -4844,80 +4799,50 @@ entry:
define <3 x float> @constrained_vector_rint_v3f32() #0 {
; PC64LE-LABEL: constrained_vector_rint_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -48(1)
-; PC64LE-NEXT: addis 3, 2, .LCPI77_0 at toc@ha
-; PC64LE-NEXT: lfs 1, .LCPI77_0 at toc@l(3)
-; PC64LE-NEXT: bl rintf
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: addis 3, 2, .LCPI77_1 at toc@ha
-; PC64LE-NEXT: fmr 31, 1
-; PC64LE-NEXT: lfs 1, .LCPI77_1 at toc@l(3)
-; PC64LE-NEXT: bl rintf
-; PC64LE-NEXT: nop
; PC64LE-NEXT: addis 3, 2, .LCPI77_2 at toc@ha
-; PC64LE-NEXT: fmr 30, 1
-; PC64LE-NEXT: lfs 1, .LCPI77_2 at toc@l(3)
-; PC64LE-NEXT: bl rintf
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: xscvdpspn 0, 30
+; PC64LE-NEXT: addis 4, 2, .LCPI77_1 at toc@ha
+; PC64LE-NEXT: lfs 0, .LCPI77_2 at toc@l(3)
+; PC64LE-NEXT: lfs 1, .LCPI77_1 at toc@l(4)
+; PC64LE-NEXT: addis 3, 2, .LCPI77_0 at toc@ha
+; PC64LE-NEXT: xsrdpic 0, 0
+; PC64LE-NEXT: lfs 2, .LCPI77_0 at toc@l(3)
; PC64LE-NEXT: addis 3, 2, .LCPI77_3 at toc@ha
-; PC64LE-NEXT: xscvdpspn 1, 1
+; PC64LE-NEXT: xsrdpic 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI77_3 at toc@l
+; PC64LE-NEXT: xsrdpic 2, 2
+; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT: xscvdpspn 0, 31
+; PC64LE-NEXT: xscvdpspn 0, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT: vmrglw 2, 2, 3
+; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: vperm 2, 4, 2, 3
-; PC64LE-NEXT: addi 1, 1, 48
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT: lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI77_0 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI77_0 at toc@l(3)
-; PC64LE9-NEXT: bl rintf
-; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: lfs 0, .LCPI77_0 at toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI77_1 at toc@ha
-; PC64LE9-NEXT: fmr 31, 1
; PC64LE9-NEXT: lfs 1, .LCPI77_1 at toc@l(3)
-; PC64LE9-NEXT: bl rintf
-; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI77_2 at toc@ha
-; PC64LE9-NEXT: fmr 30, 1
-; PC64LE9-NEXT: lfs 1, .LCPI77_2 at toc@l(3)
-; PC64LE9-NEXT: bl rintf
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: xscvdpspn 0, 1
-; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT: xscvdpspn 0, 30
-; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT: xscvdpspn 0, 31
+; PC64LE9-NEXT: xsrdpic 0, 0
+; PC64LE9-NEXT: lfs 2, .LCPI77_2 at toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI77_3 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI77_3 at toc@l
+; PC64LE9-NEXT: xsrdpic 1, 1
+; PC64LE9-NEXT: xsrdpic 2, 2
+; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: xscvdpspn 1, 1
+; PC64LE9-NEXT: xscvdpspn 2, 2
+; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
-; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: vperm 2, 4, 2, 3
-; PC64LE9-NEXT: addi 1, 1, 48
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT: lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
@@ -4930,72 +4855,31 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 {
define <3 x double> @constrained_vector_rint_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_rint_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -80(1)
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT: addis 3, 2, .LCPI78_1 at toc@ha
+; PC64LE-NEXT: addi 3, 3, .LCPI78_1 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addis 3, 2, .LCPI78_0 at toc@ha
; PC64LE-NEXT: lfd 1, .LCPI78_0 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI78_1 at toc@ha
-; PC64LE-NEXT: lfs 1, .LCPI78_1 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: addis 3, 2, .LCPI78_2 at toc@ha
-; PC64LE-NEXT: xxmrghd 63, 0, 1
-; PC64LE-NEXT: lfd 1, .LCPI78_2 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: xxswapd 0, 63
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: xxlor 2, 63, 63
-; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: fmr 1, 0
-; PC64LE-NEXT: addi 1, 1, 80
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xsrdpic 3, 1
+; PC64LE-NEXT: xvrdpic 2, 0
+; PC64LE-NEXT: xxswapd 1, 2
+; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI78_0 at toc@ha
-; PC64LE9-NEXT: lfd 1, .LCPI78_0 at toc@l(3)
-; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: lfd 0, .LCPI78_0 at toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI78_1 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI78_1 at toc@l(3)
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: addis 3, 2, .LCPI78_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 63, 0, 1
-; PC64LE9-NEXT: lfd 1, .LCPI78_2 at toc@l(3)
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xxswapd 1, 63
-; PC64LE9-NEXT: xscpsgndp 2, 63, 63
-; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: addi 3, 3, .LCPI78_1 at toc@l
+; PC64LE9-NEXT: xsrdpic 3, 0
+; PC64LE9-NEXT: lxvx 0, 0, 3
+; PC64LE9-NEXT: xvrdpic 2, 0
+; PC64LE9-NEXT: xxswapd 1, 2
; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: addi 1, 1, 64
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PC64LE9-NEXT: blr
entry:
%rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
@@ -5008,86 +4892,28 @@ entry:
define <4 x double> @constrained_vector_rint_v4f64() #0 {
; PC64LE-LABEL: constrained_vector_rint_v4f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -80(1)
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI79_0 at toc@ha
-; PC64LE-NEXT: lfd 1, .LCPI79_0 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI79_1 at toc@ha
-; PC64LE-NEXT: lfd 1, .LCPI79_1 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: addis 3, 2, .LCPI79_2 at toc@ha
-; PC64LE-NEXT: xxmrghd 63, 1, 0
-; PC64LE-NEXT: lfd 1, .LCPI79_2 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI79_3 at toc@ha
-; PC64LE-NEXT: lfd 1, .LCPI79_3 at toc@l(3)
-; PC64LE-NEXT: bl rint
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: vmr 2, 31
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: xxmrghd 35, 1, 0
-; PC64LE-NEXT: addi 1, 1, 80
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: addis 4, 2, .LCPI79_1 at toc@ha
+; PC64LE-NEXT: addi 3, 3, .LCPI79_0 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: addi 3, 4, .LCPI79_1 at toc@l
+; PC64LE-NEXT: lxvd2x 1, 0, 3
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xxswapd 1, 1
+; PC64LE-NEXT: xvrdpic 34, 0
+; PC64LE-NEXT: xvrdpic 35, 1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v4f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI79_0 at toc@ha
-; PC64LE9-NEXT: lfd 1, .LCPI79_0 at toc@l(3)
-; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: addi 3, 3, .LCPI79_0 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI79_1 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfd 1, .LCPI79_1 at toc@l(3)
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: addis 3, 2, .LCPI79_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 63, 1, 0
-; PC64LE9-NEXT: lfd 1, .LCPI79_2 at toc@l(3)
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addis 3, 2, .LCPI79_3 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfd 1, .LCPI79_3 at toc@l(3)
-; PC64LE9-NEXT: bl rint
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: vmr 2, 31
-; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 35, 1, 0
-; PC64LE9-NEXT: addi 1, 1, 64
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI79_1 at toc@l
+; PC64LE9-NEXT: xvrdpic 34, 0
+; PC64LE9-NEXT: lxvx 0, 0, 3
+; PC64LE9-NEXT: xvrdpic 35, 0
; PC64LE9-NEXT: blr
entry:
%rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
diff --git a/llvm/test/CodeGen/PowerPC/vector-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/vector-rounding-ops.ll
new file mode 100644
index 000000000000..8cbee6cc94b8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vector-rounding-ops.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \
+; RUN: FileCheck %s --check-prefix=P9
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \
+; RUN: FileCheck %s
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \
+; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST
+
+define dso_local <2 x double> @test_rint_v2f64(<2 x double> %d) local_unnamed_addr {
+; P9-LABEL: test_rint_v2f64:
+; P9: # %bb.0: # %entry
+; P9-NEXT: xvrdpic v2, v2
+; P9-NEXT: blr
+;
+; CHECK-LABEL: test_rint_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrdpic v2, v2
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: test_rint_v2f64:
+; FAST: # %bb.0: # %entry
+; FAST-NEXT: xvrdpic v2, v2
+; FAST-NEXT: blr
+entry:
+ %0 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %d)
+ ret <2 x double> %0
+}
+
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+
+
+define dso_local <4 x float> @test_rint_v4f32(<4 x float> %d) local_unnamed_addr {
+; P9-LABEL: test_rint_v4f32:
+; P9: # %bb.0: # %entry
+; P9-NEXT: xvrspic v2, v2
+; P9-NEXT: blr
+;
+; CHECK-LABEL: test_rint_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrspic v2, v2
+; CHECK-NEXT: blr
+;
+; FAST-LABEL: test_rint_v4f32:
+; FAST: # %bb.0: # %entry
+; FAST-NEXT: xvrspic v2, v2
+; FAST-NEXT: blr
+entry:
+ %0 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %d)
+ ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.rint.v4f32(<4 x float>)
More information about the llvm-commits
mailing list