[llvm] r289419 - [X86] Remove some intrinsic instructions from hasPartialRegUpdate
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 11 21:07:18 PST 2016
Author: ctopper
Date: Sun Dec 11 23:07:17 2016
New Revision: 289419
URL: http://llvm.org/viewvc/llvm-project?rev=289419&view=rev
Log:
[X86] Remove some intrinsic instructions from hasPartialRegUpdate
Summary:
These intrinsic instructions are all selected from intrinsics that have well defined behavior for where the upper bits come from. It's not the same place as the lower bits.
As you can see we were suppressing load folding for these instructions in some cases. In none of the cases was the separate load helping avoid a partial dependency on the destination register. So we should just go ahead and allow the load to be folded.
Only foldMemoryOperand was suppressing folding for these. They all have patterns for folding sse_load_f32/f64 that aren't gated with OptForSize, but sse_load_f32/f64 doesn't allow 128-bit vector loads. It only allows scalar_to_vector and vzmovl of scalar loads to match. There's no reason we can't allow a 128-bit vector load to be narrowed so I would like to fix sse_load_f32/f64 to allow that. And if I do that it changes some of these same test cases to fold the load too.
Reviewers: spatel, zvi, RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D27611
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse_partial_update.ll
llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=289419&r1=289418&r2=289419&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Dec 11 23:07:17 2016
@@ -7242,12 +7242,8 @@ static bool hasPartialRegUpdate(unsigned
case X86::CVTSI2SD64rm:
case X86::CVTSD2SSrr:
case X86::CVTSD2SSrm:
- case X86::Int_CVTSD2SSrr:
- case X86::Int_CVTSD2SSrm:
case X86::CVTSS2SDrr:
case X86::CVTSS2SDrm:
- case X86::Int_CVTSS2SDrr:
- case X86::Int_CVTSS2SDrm:
case X86::MOVHPDrm:
case X86::MOVHPSrm:
case X86::MOVLPDrm:
@@ -7258,12 +7254,8 @@ static bool hasPartialRegUpdate(unsigned
case X86::RCPSSm_Int:
case X86::ROUNDSDr:
case X86::ROUNDSDm:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSDm_Int:
case X86::ROUNDSSr:
case X86::ROUNDSSm:
- case X86::ROUNDSSr_Int:
- case X86::ROUNDSSm_Int:
case X86::RSQRTSSr:
case X86::RSQRTSSm:
case X86::RSQRTSSr_Int:
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll?rev=289419&r1=289418&r2=289419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll Sun Dec 11 23:07:17 2016
@@ -1227,14 +1227,12 @@ define <4 x float> @test_mm_cvtsd_ss_loa
; X32-LABEL: test_mm_cvtsd_ss_load:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movaps (%eax), %xmm1
-; X32-NEXT: cvtsd2ss %xmm1, %xmm0
+; X32-NEXT: cvtsd2ss (%eax), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtsd_ss_load:
; X64: # BB#0:
-; X64-NEXT: movaps (%rdi), %xmm1
-; X64-NEXT: cvtsd2ss %xmm1, %xmm0
+; X64-NEXT: cvtsd2ss (%rdi), %xmm0
; X64-NEXT: retq
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=289419&r1=289418&r2=289419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sun Dec 11 23:07:17 2016
@@ -370,8 +370,7 @@ define <4 x float> @test_x86_sse2_cvtsd2
; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
; SSE: ## BB#0:
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT: movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
+; SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
; SSE-NEXT: retl ## encoding: [0xc3]
;
; VCHECK-LABEL: test_x86_sse2_cvtsd2ss_load:
@@ -444,8 +443,7 @@ define <2 x double> @test_x86_sse2_cvtss
; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
; SSE: ## BB#0:
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT: movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT: cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
+; SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
; SSE-NEXT: retl ## encoding: [0xc3]
;
; VCHECK-LABEL: test_x86_sse2_cvtss2sd_load:
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll?rev=289419&r1=289418&r2=289419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll Sun Dec 11 23:07:17 2016
@@ -467,6 +467,24 @@ define <2 x double> @test_x86_sse41_roun
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) {
+; SSE41-LABEL: test_x86_sse41_round_sd_load:
+; SSE41: ## BB#0:
+; SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SSE41-NEXT: roundsd $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x00,0x07]
+; SSE41-NEXT: retl ## encoding: [0xc3]
+;
+; VCHECK-LABEL: test_x86_sse41_round_sd_load:
+; VCHECK: ## BB#0:
+; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; VCHECK-NEXT: vroundsd $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07]
+; VCHECK-NEXT: retl ## encoding: [0xc3]
+ %a1b = load <2 x double>, <2 x double>* %a1
+ %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+
+
define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE41-LABEL: test_x86_sse41_round_ss:
; SSE41: ## BB#0:
Modified: llvm/trunk/test/CodeGen/X86/sse_partial_update.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse_partial_update.ll?rev=289419&r1=289418&r2=289419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse_partial_update.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse_partial_update.ll Sun Dec 11 23:07:17 2016
@@ -98,9 +98,8 @@ declare <2 x double> @llvm.x86.sse2.sqrt
define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) {
; CHECK-LABEL: load_fold_cvtss2sd_int:
; CHECK: ## BB#0:
-; CHECK-NEXT: movaps (%rdi), %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtss2sd %xmm1, %xmm0
+; CHECK-NEXT: cvtss2sd (%rdi), %xmm0
; CHECK-NEXT: retq
%ld = load <4 x float>, <4 x float> *%a
%x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
Modified: llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll?rev=289419&r1=289418&r2=289419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll Sun Dec 11 23:07:17 2016
@@ -200,8 +200,7 @@ define <4 x float> @test4(<4 x float> %A
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movaps %xmm0, (%esp) ## 16-byte Spill
; X32-NEXT: calll _f
-; X32-NEXT: movaps (%esp), %xmm1 ## 16-byte Reload
-; X32-NEXT: roundss $4, %xmm1, %xmm0
+; X32-NEXT: roundss $4, (%esp), %xmm0 ## 16-byte Folded Reload
; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
@@ -211,8 +210,7 @@ define <4 x float> @test4(<4 x float> %A
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
; X64-NEXT: callq _f
-; X64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload
-; X64-NEXT: roundss $4, %xmm1, %xmm0
+; X64-NEXT: roundss $4, (%rsp), %xmm0 ## 16-byte Folded Reload
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
;
More information about the llvm-commits
mailing list