[llvm-commits] [llvm] r163473 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_floor.ll
Craig Topper
craig.topper at gmail.com
Sat Sep 8 10:42:27 PDT 2012
Author: ctopper
Date: Sat Sep 8 12:42:27 2012
New Revision: 163473
URL: http://llvm.org/viewvc/llvm-project?rev=163473&view=rev
Log:
Add instruction selection for ffloor of vectors when SSE4.1 or AVX is enabled.
Added:
llvm/trunk/test/CodeGen/X86/vec_floor.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=163473&r1=163472&r2=163473&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Sep 8 12:42:27 2012
@@ -946,6 +946,9 @@
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1023,6 +1026,7 @@
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
@@ -1031,6 +1035,7 @@
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=163473&r1=163472&r2=163473&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Sep 8 12:42:27 2012
@@ -6210,6 +6210,15 @@
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
def : Pat<(f64 (ftrunc FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v8f32 (ffloor VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x1))>;
+ def : Pat<(v4f64 (ffloor VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x1))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6219,26 +6228,33 @@
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
-def : Pat<(ffloor FR32:$src),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
-def : Pat<(f64 (ffloor FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
-def : Pat<(f32 (fnearbyint FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
-def : Pat<(f64 (fnearbyint FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
-def : Pat<(f32 (fceil FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
-def : Pat<(f64 (fceil FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
-def : Pat<(f32 (frint FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
-def : Pat<(f64 (frint FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
-def : Pat<(f32 (ftrunc FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
-def : Pat<(f64 (ftrunc FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+let Predicates = [UseSSE41] in {
+ def : Pat<(ffloor FR32:$src),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x1))>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
Added: llvm/trunk/test/CodeGen/X86/vec_floor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_floor.ll?rev=163473&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_floor.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_floor.ll Sat Sep 8 12:42:27 2012
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+
+
+define <2 x double> @floor_v2f64(<2 x double> %p)
+{
+ ; CHECK: floor_v2f64
+ ; CHECK: vroundpd
+ %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+
+define <4 x float> @floor_v4f32(<4 x float> %p)
+{
+ ; CHECK: floor_v4f32
+ ; CHECK: vroundps
+ %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+
+define <4 x double> @floor_v4f64(<4 x double> %p)
+{
+ ; CHECK: floor_v4f64
+ ; CHECK: vroundpd
+ %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+
+define <8 x float> @floor_v8f32(<8 x float> %p)
+{
+ ; CHECK: floor_v8f32
+ ; CHECK: vroundps
+ %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
More information about the llvm-commits
mailing list