[llvm] r318968 - [X86] Support folding to andnps with SSE1 only.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 24 23:20:22 PST 2017
Author: ctopper
Date: Fri Nov 24 23:20:22 2017
New Revision: 318968
URL: http://llvm.org/viewvc/llvm-project?rev=318968&view=rev
Log:
[X86] Support folding to andnps with SSE1 only.
With SSE1 only, we emit FAND and FXOR nodes for v4f32.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=318968&r1=318967&r2=318968&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Nov 24 23:20:22 2017
@@ -35033,10 +35033,13 @@ static SDValue combineFAndFNotToFAndn(SD
// Vector types are handled in combineANDXORWithAllOnesIntoANDNP().
if (!((VT == MVT::f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::f64 && Subtarget.hasSSE2())))
+ (VT == MVT::f64 && Subtarget.hasSSE2()) ||
+ (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2())))
return SDValue();
auto isAllOnesConstantFP = [](SDValue V) {
+ if (V.getSimpleValueType().isVector())
+ return ISD::isBuildVectorAllOnes(V.getNode());
auto *C = dyn_cast<ConstantFPSDNode>(V);
return C && C->getConstantFPValue()->isAllOnesValue();
};
Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll?rev=318968&r1=318967&r2=318968&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll Fri Nov 24 23:20:22 2017
@@ -55,14 +55,12 @@ define <4 x float> @test_mm_and_ps(<4 x
define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-LABEL: test_mm_andnot_ps:
; X32: # BB#0:
-; X32-NEXT: xorps {{\.LCPI.*}}, %xmm0
-; X32-NEXT: andps %xmm1, %xmm0
+; X32-NEXT: andnps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_andnot_ps:
; X64: # BB#0:
-; X64-NEXT: xorps {{.*}}(%rip), %xmm0
-; X64-NEXT: andps %xmm1, %xmm0
+; X64-NEXT: andnps %xmm1, %xmm0
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
More information about the llvm-commits
mailing list