[llvm-commits] [llvm] r56711 - in /llvm/trunk: lib/Target/X86/X86ISelDAGToDAG.cpp lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_extract.ll test/CodeGen/X86/vec_shuffle-23.ll
Evan Cheng
evan.cheng at apple.com
Fri Sep 26 16:41:33 PDT 2008
Author: evancheng
Date: Fri Sep 26 18:41:32 2008
New Revision: 56711
URL: http://llvm.org/viewvc/llvm-project?rev=56711&view=rev
Log:
Implement "punpckldq %xmm0, $xmm0" as "pshufd $0x50, %xmm0, %xmm" unless optimizing for code size.
Added:
llvm/trunk/test/CodeGen/X86/vec_shuffle-23.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vec_extract.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=56711&r1=56710&r2=56711&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Sep 26 18:41:32 2008
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
@@ -130,12 +131,17 @@
///
MachineBasicBlock *CurBB;
+ /// OptForSize - If true, selector should try to optimize for code size
+ /// instead of performance.
+ bool OptForSize;
+
public:
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
: SelectionDAGISel(X86Lowering, fast),
ContainsFPCode(false), TM(tm),
X86Lowering(*TM.getTargetLowering()),
- Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ OptForSize(OptimizeForSize) {}
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
@@ -650,6 +656,10 @@
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelect() {
CurBB = BB; // BB can change as result of isel.
+ if (!OptForSize) {
+ const Function *F = CurDAG->getMachineFunction().getFunction();
+ OptForSize = !F->isDeclaration() && F->hasNote(Attribute::OptimizeForSize);
+ }
DEBUG(BB->dump());
if (!Fast)
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=56711&r1=56710&r2=56711&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Sep 26 18:41:32 2008
@@ -186,6 +186,7 @@
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def OptForSpeed : Predicate<"!OptForSize">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=56711&r1=56710&r2=56711&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Sep 26 18:41:32 2008
@@ -744,7 +744,7 @@
addr:$dst)]>;
let Constraints = "$src1 = $dst" in {
-let AddedComplexity = 15 in {
+let AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@@ -759,7 +759,7 @@
} // AddedComplexity
} // Constraints = "$src1 = $dst"
-let AddedComplexity = 15 in
+let AddedComplexity = 20 in
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
@@ -2921,6 +2921,7 @@
SHUFP_unary_shuffle_mask:$sm),
(PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
+
// Special binary v4i32 shuffle cases with SHUFPS.
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
PSHUFD_binary_shuffle_mask:$sm)),
@@ -2937,11 +2938,21 @@
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
- SHUFP_unary_shuffle_mask:$sm)),
+ SHUFP_unary_shuffle_mask:$sm)),
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
UNPCKL_v_undef_shuffle_mask)),
@@ -2958,6 +2969,16 @@
}
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
UNPCKH_v_undef_shuffle_mask)),
@@ -2973,7 +2994,7 @@
(PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
-let AddedComplexity = 15 in {
+let AddedComplexity = 20 in {
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVHP_shuffle_mask)),
Modified: llvm/trunk/test/CodeGen/X86/vec_extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract.ll?rev=56711&r1=56710&r2=56711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract.ll Fri Sep 26 18:41:32 2008
@@ -4,7 +4,7 @@
; RUN: grep pshufd %t | count 1
; RUN: grep unpckhpd %t | count 1
-define void @test1(<4 x float>* %F, float* %f) {
+define void @test1(<4 x float>* %F, float* %f) nounwind {
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp7 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = extractelement <4 x float> %tmp7, i32 0 ; <float> [#uses=1]
@@ -12,21 +12,21 @@
ret void
}
-define float @test2(<4 x float>* %F, float* %f) {
+define float @test2(<4 x float>* %F, float* %f) nounwind {
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp7 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = extractelement <4 x float> %tmp7, i32 2 ; <float> [#uses=1]
ret float %tmp2
}
-define void @test3(float* %R, <4 x float>* %P1) {
+define void @test3(float* %R, <4 x float>* %P1) nounwind {
%X = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%tmp = extractelement <4 x float> %X, i32 3 ; <float> [#uses=1]
store float %tmp, float* %R
ret void
}
-define double @test4(double %A) {
+define double @test4(double %A) nounwind {
%tmp1 = call <2 x double> @foo( ) ; <<2 x double>> [#uses=1]
%tmp2 = extractelement <2 x double> %tmp1, i32 1 ; <double> [#uses=1]
%tmp3 = add double %tmp2, %A ; <double> [#uses=1]
Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-23.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-23.ll?rev=56711&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-23.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-23.ll Fri Sep 26 18:41:32 2008
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -optimize-size | grep punpck
+
+define i32 @t() nounwind {
+entry:
+ %a = alloca <4 x i32> ; <<4 x i32>*> [#uses=2]
+ %b = alloca <4 x i32> ; <<4 x i32>*> [#uses=5]
+ volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+ %tmp = load <4 x i32>* %a ; <<4 x i32>> [#uses=1]
+ store <4 x i32> %tmp, <4 x i32>* %b
+ %tmp1 = load <4 x i32>* %b ; <<4 x i32>> [#uses=1]
+ %tmp2 = load <4 x i32>* %b ; <<4 x i32>> [#uses=1]
+ %punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x i32>> [#uses=1]
+ store <4 x i32> %punpckldq, <4 x i32>* %b
+ %tmp3 = load <4 x i32>* %b ; <<4 x i32>> [#uses=1]
+ %result = extractelement <4 x i32> %tmp3, i32 0 ; <i32> [#uses=1]
+ ret i32 %result
+}
More information about the llvm-commits
mailing list