[llvm] r287122 - [x86] add fake scalar FP logic instructions to ReplaceableInstrs to save some bytes
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 16 09:42:40 PST 2016
Author: spatel
Date: Wed Nov 16 11:42:40 2016
New Revision: 287122
URL: http://llvm.org/viewvc/llvm-project?rev=287122&view=rev
Log:
[x86] add fake scalar FP logic instructions to ReplaceableInstrs to save some bytes
We can replace "scalar" FP-bitwise-logic with other forms of bitwise-logic instructions.
Scalar SSE/AVX FP-logic instructions only exist in your imagination and/or the bowels of
compilers, but logically equivalent int, float, and double variants of bitwise-logic
instructions are reality in x86, and the float variant may be a shorter instruction
depending on which flavor (SSE or AVX) of vector ISA you have...so just prefer float all
the time.
This is a preliminary step towards solving PR6137:
https://llvm.org/bugs/show_bug.cgi?id=6137
Differential Revision:
https://reviews.llvm.org/D26712
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll
llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll
llvm/trunk/test/CodeGen/X86/fp-logic.ll
llvm/trunk/test/CodeGen/X86/fp-select-cmp-and.ll
llvm/trunk/test/CodeGen/X86/sse-minmax.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=287122&r1=287121&r2=287122&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Nov 16 11:42:40 2016
@@ -7690,12 +7690,16 @@ static const uint16_t ReplaceableInstrs[
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
{ X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
{ X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::FsANDNPSrr, X86::FsANDNPDrr,X86::PANDNrr },
{ X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::FsANDPSrr, X86::FsANDPDrr, X86::PANDrr },
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::FsORPSrr, X86::FsORPDrr, X86::PORrr },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ { X86::FsXORPSrr, X86::FsXORPDrr, X86::PXORrr },
// AVX 128-bit support
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
@@ -7706,12 +7710,16 @@ static const uint16_t ReplaceableInstrs[
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VFsANDNPSrr,X86::VFsANDNPDrr,X86::VPANDNrr },
{ X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
{ X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VFsANDPSrr, X86::VFsANDPDrr, X86::VPANDrr },
{ X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::VFsORPSrr, X86::VFsORPDrr, X86::VPORrr },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
+ { X86::VFsXORPSrr, X86::VFsXORPDrr, X86::VPXORrr },
// AVX 256-bit support
{ X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
{ X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll?rev=287122&r1=287121&r2=287122&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll Wed Nov 16 11:42:40 2016
@@ -30,9 +30,9 @@ define double @select_fcmp_oeq_f64(doubl
; SSE-LABEL: select_fcmp_oeq_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpeqsd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_oeq_f64:
@@ -71,10 +71,10 @@ define double @select_fcmp_ogt_f64(doubl
; SSE-LABEL: select_fcmp_ogt_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpltsd %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm1, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm1
-; SSE-NEXT: orpd %xmm2, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: andps %xmm1, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm2, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ogt_f64:
@@ -113,10 +113,10 @@ define double @select_fcmp_oge_f64(doubl
; SSE-LABEL: select_fcmp_oge_f64:
; SSE: # BB#0:
; SSE-NEXT: cmplesd %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm1, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm1
-; SSE-NEXT: orpd %xmm2, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: andps %xmm1, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm2, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_oge_f64:
@@ -154,9 +154,9 @@ define double @select_fcmp_olt_f64(doubl
; SSE-LABEL: select_fcmp_olt_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpltsd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_olt_f64:
@@ -194,9 +194,9 @@ define double @select_fcmp_ole_f64(doubl
; SSE-LABEL: select_fcmp_ole_f64:
; SSE: # BB#0:
; SSE-NEXT: cmplesd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ole_f64:
@@ -234,9 +234,9 @@ define double @select_fcmp_ord_f64(doubl
; SSE-LABEL: select_fcmp_ord_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpordsd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ord_f64:
@@ -274,9 +274,9 @@ define double @select_fcmp_uno_f64(doubl
; SSE-LABEL: select_fcmp_uno_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpunordsd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_uno_f64:
@@ -314,9 +314,9 @@ define double @select_fcmp_ugt_f64(doubl
; SSE-LABEL: select_fcmp_ugt_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpnlesd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ugt_f64:
@@ -354,9 +354,9 @@ define double @select_fcmp_uge_f64(doubl
; SSE-LABEL: select_fcmp_uge_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpnltsd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_uge_f64:
@@ -395,10 +395,10 @@ define double @select_fcmp_ult_f64(doubl
; SSE-LABEL: select_fcmp_ult_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpnlesd %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm1, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm1
-; SSE-NEXT: orpd %xmm2, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: andps %xmm1, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm2, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ult_f64:
@@ -437,10 +437,10 @@ define double @select_fcmp_ule_f64(doubl
; SSE-LABEL: select_fcmp_ule_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpnltsd %xmm0, %xmm1
-; SSE-NEXT: andpd %xmm1, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm1
-; SSE-NEXT: orpd %xmm2, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: andps %xmm1, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm1
+; SSE-NEXT: orps %xmm2, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_ule_f64:
@@ -478,9 +478,9 @@ define double @select_fcmp_une_f64(doubl
; SSE-LABEL: select_fcmp_une_f64:
; SSE: # BB#0:
; SSE-NEXT: cmpneqsd %xmm1, %xmm0
-; SSE-NEXT: andpd %xmm0, %xmm2
-; SSE-NEXT: andnpd %xmm3, %xmm0
-; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm2
+; SSE-NEXT: andnps %xmm3, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_fcmp_une_f64:
Modified: llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll?rev=287122&r1=287121&r2=287122&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll Wed Nov 16 11:42:40 2016
@@ -3,20 +3,20 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
; Test that we can replace "scalar" FP-bitwise-logic with the optimal instruction.
-; Scalar x86 FP-logic instructions only exist in your imagination and/or the bowels
+; Scalar x86 FP-logic instructions only exist in your imagination and/or the bowels
; of compilers, but float and double variants of FP-logic instructions are reality
-; and float may be a shorter instruction depending on which flavor of vector ISA
-; you have...so just prefer float all the time, ok? Yay, x86!
+; and float may be a shorter instruction depending on which flavor of vector ISA
+; you have...so just prefer float all the time, ok? Yay, x86!
define double @FsANDPSrr(double %x, double %y) {
; SSE-LABEL: FsANDPSrr:
; SSE: # BB#0:
-; SSE-NEXT: andpd %xmm1, %xmm0
+; SSE-NEXT: andps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: FsANDPSrr:
; AVX: # BB#0:
-; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
%bc1 = bitcast double %x to i64
@@ -56,12 +56,12 @@ define double @FsANDNPSrr(double %x, dou
define double @FsORPSrr(double %x, double %y) {
; SSE-LABEL: FsORPSrr:
; SSE: # BB#0:
-; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: FsORPSrr:
; AVX: # BB#0:
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
%bc1 = bitcast double %x to i64
@@ -74,12 +74,12 @@ define double @FsORPSrr(double %x, doubl
define double @FsXORPSrr(double %x, double %y) {
; SSE-LABEL: FsXORPSrr:
; SSE: # BB#0:
-; SSE-NEXT: xorpd %xmm1, %xmm0
+; SSE-NEXT: xorps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: FsXORPSrr:
; AVX: # BB#0:
-; AVX-NEXT: vxorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
%bc1 = bitcast double %x to i64
Modified: llvm/trunk/test/CodeGen/X86/fp-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-logic.ll?rev=287122&r1=287121&r2=287122&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-logic.ll Wed Nov 16 11:42:40 2016
@@ -223,7 +223,7 @@ define float @f7_xor(float %x) {
define double @doubles(double %x, double %y) {
; CHECK-LABEL: doubles:
; CHECK: # BB#0:
-; CHECK-NEXT: andpd %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%bc1 = bitcast double %x to i64
Modified: llvm/trunk/test/CodeGen/X86/fp-select-cmp-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-select-cmp-and.ll?rev=287122&r1=287121&r2=287122&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-select-cmp-and.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-select-cmp-and.ll Wed Nov 16 11:42:40 2016
@@ -5,7 +5,7 @@ define double @test1(double %a, double %
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: cmpltsd %xmm2, %xmm0
-; CHECK-NEXT: andpd %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp olt double %a, %eps
@@ -17,7 +17,7 @@ define double @test2(double %a, double %
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: cmplesd %xmm2, %xmm0
-; CHECK-NEXT: andpd %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp ole double %a, %eps
@@ -29,8 +29,8 @@ define double @test3(double %a, double %
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: cmpltsd %xmm0, %xmm2
-; CHECK-NEXT: andpd %xmm1, %xmm2
-; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm2
+; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp ogt double %a, %eps
@@ -42,8 +42,8 @@ define double @test4(double %a, double %
; CHECK-LABEL: test4:
; CHECK: # BB#0:
; CHECK-NEXT: cmplesd %xmm0, %xmm2
-; CHECK-NEXT: andpd %xmm1, %xmm2
-; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm2
+; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp oge double %a, %eps
@@ -55,7 +55,7 @@ define double @test5(double %a, double %
; CHECK-LABEL: test5:
; CHECK: # BB#0:
; CHECK-NEXT: cmpltsd %xmm2, %xmm0
-; CHECK-NEXT: andnpd %xmm1, %xmm0
+; CHECK-NEXT: andnps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp olt double %a, %eps
@@ -67,7 +67,7 @@ define double @test6(double %a, double %
; CHECK-LABEL: test6:
; CHECK: # BB#0:
; CHECK-NEXT: cmplesd %xmm2, %xmm0
-; CHECK-NEXT: andnpd %xmm1, %xmm0
+; CHECK-NEXT: andnps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp ole double %a, %eps
@@ -79,8 +79,8 @@ define double @test7(double %a, double %
; CHECK-LABEL: test7:
; CHECK: # BB#0:
; CHECK-NEXT: cmpltsd %xmm0, %xmm2
-; CHECK-NEXT: andnpd %xmm1, %xmm2
-; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: andnps %xmm1, %xmm2
+; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp ogt double %a, %eps
@@ -92,8 +92,8 @@ define double @test8(double %a, double %
; CHECK-LABEL: test8:
; CHECK: # BB#0:
; CHECK-NEXT: cmplesd %xmm0, %xmm2
-; CHECK-NEXT: andnpd %xmm1, %xmm2
-; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: andnps %xmm1, %xmm2
+; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp oge double %a, %eps
@@ -220,10 +220,10 @@ define double @test18(double %a, double
; CHECK-LABEL: test18:
; CHECK: # BB#0:
; CHECK-NEXT: cmplesd %xmm0, %xmm3
-; CHECK-NEXT: andpd %xmm3, %xmm2
-; CHECK-NEXT: andnpd %xmm1, %xmm3
-; CHECK-NEXT: orpd %xmm2, %xmm3
-; CHECK-NEXT: movapd %xmm3, %xmm0
+; CHECK-NEXT: andps %xmm3, %xmm2
+; CHECK-NEXT: andnps %xmm1, %xmm3
+; CHECK-NEXT: orps %xmm2, %xmm3
+; CHECK-NEXT: movaps %xmm3, %xmm0
; CHECK-NEXT: retq
;
%cmp = fcmp oge double %a, %eps
Modified: llvm/trunk/test/CodeGen/X86/sse-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-minmax.ll?rev=287122&r1=287121&r2=287122&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-minmax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-minmax.ll Wed Nov 16 11:42:40 2016
@@ -84,11 +84,11 @@ define double @olt_inverse(double %x, do
define double @oge(double %x, double %y) {
; STRICT-LABEL: oge:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm1, %xmm2
; STRICT-NEXT: cmplesd %xmm0, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm0
-; STRICT-NEXT: andnpd %xmm1, %xmm2
-; STRICT-NEXT: orpd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm0
+; STRICT-NEXT: andnps %xmm1, %xmm2
+; STRICT-NEXT: orps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: oge:
@@ -104,12 +104,12 @@ define double @oge(double %x, double %y)
define double @ole(double %x, double %y) {
; STRICT-LABEL: ole:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm0, %xmm2
+; STRICT-NEXT: movaps %xmm0, %xmm2
; STRICT-NEXT: cmplesd %xmm1, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm0
-; STRICT-NEXT: andnpd %xmm1, %xmm2
-; STRICT-NEXT: orpd %xmm0, %xmm2
-; STRICT-NEXT: movapd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm0
+; STRICT-NEXT: andnps %xmm1, %xmm2
+; STRICT-NEXT: orps %xmm0, %xmm2
+; STRICT-NEXT: movaps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ole:
@@ -125,12 +125,12 @@ define double @ole(double %x, double %y)
define double @oge_inverse(double %x, double %y) {
; STRICT-LABEL: oge_inverse:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm1, %xmm2
; STRICT-NEXT: cmplesd %xmm0, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm2
-; STRICT-NEXT: orpd %xmm1, %xmm2
-; STRICT-NEXT: movapd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm1
+; STRICT-NEXT: andnps %xmm0, %xmm2
+; STRICT-NEXT: orps %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: oge_inverse:
@@ -152,12 +152,12 @@ define double @oge_inverse(double %x, do
define double @ole_inverse(double %x, double %y) {
; STRICT-LABEL: ole_inverse:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm0, %xmm2
+; STRICT-NEXT: movaps %xmm0, %xmm2
; STRICT-NEXT: cmplesd %xmm1, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm2
-; STRICT-NEXT: orpd %xmm1, %xmm2
-; STRICT-NEXT: movapd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm1
+; STRICT-NEXT: andnps %xmm0, %xmm2
+; STRICT-NEXT: orps %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: ole_inverse:
@@ -257,7 +257,7 @@ define double @oge_x(double %x) {
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm1, %xmm1
; STRICT-NEXT: cmplesd %xmm0, %xmm1
-; STRICT-NEXT: andpd %xmm1, %xmm0
+; STRICT-NEXT: andps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: oge_x:
@@ -275,10 +275,10 @@ define double @ole_x(double %x) {
; STRICT-LABEL: ole_x:
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm2, %xmm2
-; STRICT-NEXT: movapd %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm0, %xmm1
; STRICT-NEXT: cmplesd %xmm2, %xmm1
-; STRICT-NEXT: andpd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
+; STRICT-NEXT: andps %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ole_x:
@@ -297,8 +297,8 @@ define double @oge_inverse_x(double %x)
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm1, %xmm1
; STRICT-NEXT: cmplesd %xmm0, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
+; STRICT-NEXT: andnps %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: oge_inverse_x:
@@ -323,10 +323,10 @@ define double @ole_inverse_x(double %x)
; STRICT-LABEL: ole_inverse_x:
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm2, %xmm2
-; STRICT-NEXT: movapd %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm0, %xmm1
; STRICT-NEXT: cmplesd %xmm2, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
+; STRICT-NEXT: andnps %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: ole_inverse_x:
@@ -350,12 +350,12 @@ define double @ole_inverse_x(double %x)
define double @ugt(double %x, double %y) {
; STRICT-LABEL: ugt:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm0, %xmm2
+; STRICT-NEXT: movaps %xmm0, %xmm2
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm0
-; STRICT-NEXT: andnpd %xmm1, %xmm2
-; STRICT-NEXT: orpd %xmm0, %xmm2
-; STRICT-NEXT: movapd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm0
+; STRICT-NEXT: andnps %xmm1, %xmm2
+; STRICT-NEXT: orps %xmm0, %xmm2
+; STRICT-NEXT: movaps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ugt:
@@ -371,11 +371,11 @@ define double @ugt(double %x, double %y)
define double @ult(double %x, double %y) {
; STRICT-LABEL: ult:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm1, %xmm2
; STRICT-NEXT: cmpnlesd %xmm0, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm0
-; STRICT-NEXT: andnpd %xmm1, %xmm2
-; STRICT-NEXT: orpd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm0
+; STRICT-NEXT: andnps %xmm1, %xmm2
+; STRICT-NEXT: orps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ult:
@@ -391,12 +391,12 @@ define double @ult(double %x, double %y)
define double @ugt_inverse(double %x, double %y) {
; STRICT-LABEL: ugt_inverse:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm0, %xmm2
+; STRICT-NEXT: movaps %xmm0, %xmm2
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm2
-; STRICT-NEXT: orpd %xmm1, %xmm2
-; STRICT-NEXT: movapd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm1
+; STRICT-NEXT: andnps %xmm0, %xmm2
+; STRICT-NEXT: orps %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: ugt_inverse:
@@ -418,12 +418,12 @@ define double @ugt_inverse(double %x, do
define double @ult_inverse(double %x, double %y) {
; STRICT-LABEL: ult_inverse:
; STRICT: # BB#0:
-; STRICT-NEXT: movapd %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm1, %xmm2
; STRICT-NEXT: cmpnlesd %xmm0, %xmm2
-; STRICT-NEXT: andpd %xmm2, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm2
-; STRICT-NEXT: orpd %xmm1, %xmm2
-; STRICT-NEXT: movapd %xmm2, %xmm0
+; STRICT-NEXT: andps %xmm2, %xmm1
+; STRICT-NEXT: andnps %xmm0, %xmm2
+; STRICT-NEXT: orps %xmm1, %xmm2
+; STRICT-NEXT: movaps %xmm2, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: ult_inverse:
@@ -524,10 +524,10 @@ define double @ugt_x(double %x) {
; STRICT-LABEL: ugt_x:
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm2, %xmm2
-; STRICT-NEXT: movapd %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm0, %xmm1
; STRICT-NEXT: cmpnlesd %xmm2, %xmm1
-; STRICT-NEXT: andpd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
+; STRICT-NEXT: andps %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ugt_x:
@@ -546,7 +546,7 @@ define double @ult_x(double %x) {
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm1, %xmm1
; STRICT-NEXT: cmpnlesd %xmm0, %xmm1
-; STRICT-NEXT: andpd %xmm1, %xmm0
+; STRICT-NEXT: andps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ult_x:
@@ -564,10 +564,10 @@ define double @ugt_inverse_x(double %x)
; STRICT-LABEL: ugt_inverse_x:
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm2, %xmm2
-; STRICT-NEXT: movapd %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm0, %xmm1
; STRICT-NEXT: cmpnlesd %xmm2, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
+; STRICT-NEXT: andnps %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: ugt_inverse_x:
@@ -593,8 +593,8 @@ define double @ult_inverse_x(double %x)
; STRICT: # BB#0:
; STRICT-NEXT: xorps %xmm1, %xmm1
; STRICT-NEXT: cmpnlesd %xmm0, %xmm1
-; STRICT-NEXT: andnpd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
+; STRICT-NEXT: andnps %xmm0, %xmm1
+; STRICT-NEXT: movaps %xmm1, %xmm0
; STRICT-NEXT: retq
;
; UNSAFE-LABEL: ult_inverse_x:
More information about the llvm-commits
mailing list