[llvm] r200324 - [X86] Add extra rules for combining vselect dag nodes into movsd.
Andrea Di Biagio
Andrea_DiBiagio at sn.scee.net
Tue Jan 28 10:14:22 PST 2014
Author: adibiagio
Date: Tue Jan 28 12:14:21 2014
New Revision: 200324
URL: http://llvm.org/viewvc/llvm-project?rev=200324&view=rev
Log:
[X86] Add extra rules for combining vselect dag nodes into movsd.
This improves the fix committed at revision 199683 adding the
following new target specific combine rules:
1) fold (v4i32: vselect <0,0,-1,-1>, A, B) ->
(v4i32 (bitcast (movsd (v2i64 (bitcast A)), (v2i64 (bitcast B))) ))
2) fold (v4f32: vselect <0,0,-1,-1>, A, B) ->
(v4f32 (bitcast (movsd (v2f64 (bitcast A)), (v2f64 (bitcast B))) ))
3) fold (v4i32: vselect <-1,-1,0,0>, A, B) ->
(v4i32 (bitcast (movsd (v2i64 (bitcast B)), (v2i64 (bitcast A))) ))
4) fold (v4f32: vselect <-1,-1,0,0>, A, B) ->
(v4f32 (bitcast (movsd (v2i64 (bitcast B)), (v2i64 (bitcast A))) ))
Added:
llvm/trunk/test/CodeGen/X86/vselect-2.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/sse41-blend.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=200324&r1=200323&r2=200324&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 28 12:14:21 2014
@@ -17324,6 +17324,46 @@ static SDValue PerformSELECTCombine(SDNo
return getTargetShuffleNode(X86ISD::MOVSS, DL, VT, A, B, DAG);
return getTargetShuffleNode(X86ISD::MOVSD, DL, VT, A, B, DAG);
}
+
+ if (Subtarget->hasSSE2() && (VT == MVT::v4i32 || VT == MVT::v4f32)) {
+ // fold (v4i32: vselect <0,0,-1,-1>, A, B) ->
+ // (v4i32 (bitcast (movsd (v2i64 (bitcast A)),
+ // (v2i64 (bitcast B)))))
+ //
+ // fold (v4f32: vselect <0,0,-1,-1>, A, B) ->
+ // (v4f32 (bitcast (movsd (v2f64 (bitcast A)),
+ // (v2f64 (bitcast B)))))
+ //
+ // fold (v4i32: vselect <-1,-1,0,0>, A, B) ->
+ // (v4i32 (bitcast (movsd (v2i64 (bitcast B)),
+ // (v2i64 (bitcast A)))))
+ //
+ // fold (v4f32: vselect <-1,-1,0,0>, A, B) ->
+ // (v4f32 (bitcast (movsd (v2f64 (bitcast B)),
+ // (v2f64 (bitcast A)))))
+
+ CanFold = (isZero(Cond.getOperand(0)) &&
+ isZero(Cond.getOperand(1)) &&
+ isAllOnes(Cond.getOperand(2)) &&
+ isAllOnes(Cond.getOperand(3)));
+
+ if (!CanFold && isAllOnes(Cond.getOperand(0)) &&
+ isAllOnes(Cond.getOperand(1)) &&
+ isZero(Cond.getOperand(2)) &&
+ isZero(Cond.getOperand(3))) {
+ CanFold = true;
+ std::swap(LHS, RHS);
+ }
+
+ if (CanFold) {
+ EVT NVT = (VT == MVT::v4i32) ? MVT::v2i64 : MVT::v2f64;
+ SDValue NewA = DAG.getNode(ISD::BITCAST, DL, NVT, LHS);
+ SDValue NewB = DAG.getNode(ISD::BITCAST, DL, NVT, RHS);
+ SDValue Select = getTargetShuffleNode(X86ISD::MOVSD, DL, NVT, NewA,
+ NewB, DAG);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Select);
+ }
+ }
}
}
Modified: llvm/trunk/test/CodeGen/X86/sse41-blend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-blend.ll?rev=200324&r1=200323&r2=200324&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-blend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-blend.ll Tue Jan 28 12:14:21 2014
@@ -13,7 +13,7 @@ define <4 x float> @vsel_float(<4 x floa
;CHECK: blendvps
;CHECK: ret
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
- %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
+ %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
ret <4 x i8> %vsel
}
@@ -30,7 +30,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %
;CHECK: blendvps
;CHECK: ret
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
- %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
+ %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2
ret <4 x i32> %vsel
}
Added: llvm/trunk/test/CodeGen/X86/vselect-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-2.ll?rev=200324&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-2.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vselect-2.ll Tue Jan 28 12:14:21 2014
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=sse2 | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
+ %select = select <4 x i1><i1 true, i1 true, i1 false, i1 false>, <4 x i32> %A, <4 x i32> %B
+ ret <4 x i32> %select
+}
+; CHECK-LABEL: test1
+; CHECK: movsd
+; CHECK: ret
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
+ %select = select <4 x i1><i1 false, i1 false, i1 true, i1 true>, <4 x i32> %A, <4 x i32> %B
+ ret <4 x i32> %select
+}
+; CHECK-LABEL: test2
+; CHECK: movsd
+; CHECK-NEXT: ret
+
+define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
+ %select = select <4 x i1><i1 true, i1 true, i1 false, i1 false>, <4 x float> %A, <4 x float> %B
+ ret <4 x float> %select
+}
+; CHECK-LABEL: test3
+; CHECK: movsd
+; CHECK: ret
+
+define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
+ %select = select <4 x i1><i1 false, i1 false, i1 true, i1 true>, <4 x float> %A, <4 x float> %B
+ ret <4 x float> %select
+}
+; CHECK-LABEL: test4
+; CHECK: movsd
+; CHECK-NEXT: ret
More information about the llvm-commits
mailing list