[llvm] r279806 - [X86][SSE] Add CMPSS/CMPSD intrinsic scalar load folding support.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 26 00:08:01 PDT 2016
Author: ctopper
Date: Fri Aug 26 02:08:00 2016
New Revision: 279806
URL: http://llvm.org/viewvc/llvm-project?rev=279806&view=rev
Log:
[X86][SSE] Add CMPSS/CMPSD intrinsic scalar load folding support.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=279806&r1=279805&r2=279806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Aug 26 02:08:00 2016
@@ -1700,6 +1700,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, 0 },
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
{ X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, 0 },
+ { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, 0 },
+ { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, 0 },
{ X86::VANDPDZrr, X86::VANDPDZrm, 0 },
{ X86::VANDPSZrr, X86::VANDPSZrm, 0 },
{ X86::VANDNPDZrr, X86::VANDNPDZrm, 0 },
@@ -6189,6 +6191,7 @@ static bool isNonFoldablePartialRegister
// instruction isn't scalar (SS).
switch (UserOpc) {
case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
+ case X86::Int_CMPSSrr: case X86::Int_VCMPSSrr: case X86::VCMPSSZrr_Int:
case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
@@ -6213,6 +6216,7 @@ static bool isNonFoldablePartialRegister
// instruction isn't scalar (SD).
switch (UserOpc) {
case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
+ case X86::Int_CMPSDrr: case X86::Int_VCMPSDrr: case X86::VCMPSDZrr_Int:
case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
Modified: llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll?rev=279806&r1=279805&r2=279806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll Fri Aug 26 02:08:00 2016
@@ -196,3 +196,25 @@ entry:
%1 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %y, <4 x float> %vecinit4.i)
ret <4 x float> %1
}
+
+define <4 x float> @cmpss_fold(float* %x, <4 x float> %y) {
+; X32-LABEL: cmpss_fold:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: cmpeqss (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: cmpss_fold:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: cmpeqss (%rdi), %xmm0
+; X64-NEXT: retq
+entry:
+ %0 = load float, float* %x, align 1
+ %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
+ %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0.000000e+00, i32 1
+ %vecinit3.i = insertelement <4 x float> %vecinit2.i, float 0.000000e+00, i32 2
+ %vecinit4.i = insertelement <4 x float> %vecinit3.i, float 0.000000e+00, i32 3
+ %1 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %y, <4 x float> %vecinit4.i, i8 0)
+ ret <4 x float> %1
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
More information about the llvm-commits
mailing list