[llvm] r338821 - [X86] Support fp128 and/or/xor/load/store with VEX and EVEX encoded instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 2 23:12:57 PDT 2018
Author: ctopper
Date: Thu Aug 2 23:12:56 2018
New Revision: 338821
URL: http://llvm.org/viewvc/llvm-project?rev=338821&view=rev
Log:
[X86] Support fp128 and/or/xor/load/store with VEX and EVEX encoded instructions.
Move all the patterns to X86InstrVecCompiler.td so we can keep SSE/AVX/AVX512 all in one place.
To save some patterns we'll use an existing DAG combine to convert f128 fand/for/fxor to integer when sse2 is enabled. This allows use to reuse all the existing patterns for v2i64.
I believe this now makes SHA instructions the only case where VEX/EVEX and legacy encoded instructions could be generated simultaneously.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
llvm/trunk/test/CodeGen/X86/fp128-i128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=338821&r1=338820&r2=338821&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug 2 23:12:56 2018
@@ -613,7 +613,8 @@ X86TargetLowering::X86TargetLowering(con
// Long double always uses X87, except f128 in MMX.
if (UseX87) {
if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
- addRegisterClass(MVT::f128, &X86::VR128RegClass);
+ addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+ : &X86::VR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
setOperationAction(ISD::FABS , MVT::f128, Custom);
setOperationAction(ISD::FNEG , MVT::f128, Custom);
@@ -36981,7 +36982,7 @@ static SDValue lowerX86FPLogicOp(SDNode
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
// If we have integer vector types available, use the integer opcodes.
- if (VT.isVector() && Subtarget.hasSSE2()) {
+ if ((VT.isVector() || VT == MVT::f128) && Subtarget.hasSSE2()) {
SDLoc dl(N);
MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=338821&r1=338820&r2=338821&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Aug 2 23:12:56 2018
@@ -8132,51 +8132,6 @@ let Predicates = [UseAVX2] in {
}
//===----------------------------------------------------------------------===//
-// Extra selection patterns for f128, f128mem
-
-// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
-def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-def : Pat<(store (f128 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-
-def : Pat<(alignedloadf128 addr:$src),
- (COPY_TO_REGCLASS (MOVAPSrm addr:$src), VR128)>;
-def : Pat<(loadf128 addr:$src),
- (COPY_TO_REGCLASS (MOVUPSrm addr:$src), VR128)>;
-
-// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
-def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (ANDPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (ANDPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (ORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (ORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (XORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (XORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-//===----------------------------------------------------------------------===//
// GFNI instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td?rev=338821&r1=338820&r2=338821&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td Thu Aug 2 23:12:56 2018
@@ -49,6 +49,19 @@ def : Pat<(v2f64 (bitconvert (v8i16 VR12
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>;
+
// Bitcasts between 256-bit vector types. Return the original type since
// no instruction is needed for the conversion
def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
@@ -509,3 +522,68 @@ let Predicates = [HasBWI, HasVLX] in {
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
(i8 60)), (i8 60))>;
}
+
+//===----------------------------------------------------------------------===//
+// Extra selection patterns for f128, f128mem
+
+// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
+let Predicates = [NoAVX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (MOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (MOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+def : Pat<(alignedstore (f128 VR128X:$src), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
+def : Pat<(store (f128 VR128X:$src), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (VMOVAPSZ128rm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (VMOVUPSZ128rm addr:$src)>;
+}
+
+// With SSE2 the DAG combiner converts fp logic ops to integer logic ops to
+// reduce patterns.
+let Predicates = [UseSSE1] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
+ (ANDPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
+ (ANDPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
+ (ORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
+ (ORPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
+ (XORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
+ (XORPSrr VR128:$src1, VR128:$src2)>;
+}
+
+
Modified: llvm/trunk/test/CodeGen/X86/fp128-i128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp128-i128.ll?rev=338821&r1=338820&r2=338821&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp128-i128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp128-i128.ll Thu Aug 2 23:12:56 2018
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
; These tests were generated from simplified libm C code.
; When compiled for the x86_64-linux-android target,
@@ -42,19 +46,33 @@
; foo(w);
; }
define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
-; CHECK-LABEL: TestUnionLD1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
-; CHECK-NEXT: andq %rdi, %rcx
-; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
-; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
-; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT: jmp foo # TAILCALL
+; SSE-LABEL: TestUnionLD1:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
+; SSE-NEXT: andq %rdi, %rcx
+; SSE-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
+; SSE-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: orq %rcx, %rdx
+; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT: jmp foo # TAILCALL
+;
+; AVX-LABEL: TestUnionLD1:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; AVX-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
+; AVX-NEXT: andq %rdi, %rcx
+; AVX-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
+; AVX-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT: orq %rcx, %rdx
+; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
+; AVX-NEXT: jmp foo # TAILCALL
entry:
%0 = bitcast fp128 %s to i128
%1 = zext i64 %n to i128
@@ -77,14 +95,23 @@ entry:
; return w;
; }
define fp128 @TestUnionLD2(fp128 %s) #0 {
-; CHECK-LABEL: TestUnionLD2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: TestUnionLD2:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestUnionLD2:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
+; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %s to i128
%bf.clear = and i128 %0, -18446744073709551616
@@ -101,25 +128,45 @@ entry:
; return (z.e < 0.1L) ? 1.0L : 2.0L;
; }
define fp128 @TestI128_1(fp128 %x) #0 {
-; CHECK-LABEL: TestI128_1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
-; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq %rcx, (%rsp)
-; CHECK-NEXT: movaps (%rsp), %xmm0
-; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: callq __lttf2
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: sets %cl
-; CHECK-NEXT: shlq $4, %rcx
-; CHECK-NEXT: movaps {{\.LCPI.*}}(%rcx), %xmm0
-; CHECK-NEXT: addq $40, %rsp
-; CHECK-NEXT: retq
+; SSE-LABEL: TestI128_1:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: subq $40, %rsp
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; SSE-NEXT: andq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rcx, (%rsp)
+; SSE-NEXT: movaps (%rsp), %xmm0
+; SSE-NEXT: movaps {{.*}}(%rip), %xmm1
+; SSE-NEXT: callq __lttf2
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: testl %eax, %eax
+; SSE-NEXT: sets %cl
+; SSE-NEXT: shlq $4, %rcx
+; SSE-NEXT: movaps {{\.LCPI.*}}(%rcx), %xmm0
+; SSE-NEXT: addq $40, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestI128_1:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; AVX-NEXT: andq {{[0-9]+}}(%rsp), %rax
+; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movq %rcx, (%rsp)
+; AVX-NEXT: vmovaps (%rsp), %xmm0
+; AVX-NEXT: vmovaps {{.*}}(%rip), %xmm1
+; AVX-NEXT: callq __lttf2
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: testl %eax, %eax
+; AVX-NEXT: sets %cl
+; AVX-NEXT: shlq $4, %rcx
+; AVX-NEXT: vmovaps {{\.LCPI.*}}(%rcx), %xmm0
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
%bf.clear = and i128 %0, 170141183460469231731687303715884105727
@@ -139,15 +186,25 @@ entry:
; return (hx & 0x8000) == 0 ? x : y;
; }
define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 {
-; CHECK-LABEL: TestI128_2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: jns .LBB3_2
-; CHECK-NEXT: # %bb.1: # %entry
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: .LBB3_2: # %entry
-; CHECK-NEXT: retq
+; SSE-LABEL: TestI128_2:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: jns .LBB3_2
+; SSE-NEXT: # %bb.1: # %entry
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: .LBB3_2: # %entry
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestI128_2:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: jns .LBB3_2
+; AVX-NEXT: # %bb.1: # %entry
+; AVX-NEXT: vmovaps %xmm1, %xmm0
+; AVX-NEXT: .LBB3_2: # %entry
+; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
%cmp = icmp sgt i128 %0, -1
@@ -167,32 +224,59 @@ entry:
; return (u.e);
; }
define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 {
-; CHECK-LABEL: TestI128_3:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $56, %rsp
-; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
-; CHECK-NEXT: testq %rcx, %rax
-; CHECK-NEXT: je .LBB4_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: jmp .LBB4_3
-; CHECK-NEXT: .LBB4_2: # %if.then
-; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: callq __multf3
-; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
-; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rdx
-; CHECK-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
-; CHECK-NEXT: orq %rdx, %rax
-; CHECK-NEXT: .LBB4_3: # %if.end
-; CHECK-NEXT: movq %rcx, (%rsp)
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movaps (%rsp), %xmm0
-; CHECK-NEXT: addq $56, %rsp
-; CHECK-NEXT: retq
+; SSE-LABEL: TestI128_3:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: subq $56, %rsp
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
+; SSE-NEXT: testq %rcx, %rax
+; SSE-NEXT: je .LBB4_2
+; SSE-NEXT: # %bb.1:
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; SSE-NEXT: jmp .LBB4_3
+; SSE-NEXT: .LBB4_2: # %if.then
+; SSE-NEXT: movaps {{.*}}(%rip), %xmm1
+; SSE-NEXT: callq __multf3
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; SSE-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
+; SSE-NEXT: andq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: .LBB4_3: # %if.end
+; SSE-NEXT: movq %rcx, (%rsp)
+; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps (%rsp), %xmm0
+; SSE-NEXT: addq $56, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestI128_3:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $56, %rsp
+; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
+; AVX-NEXT: testq %rcx, %rax
+; AVX-NEXT: je .LBB4_2
+; AVX-NEXT: # %bb.1:
+; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; AVX-NEXT: jmp .LBB4_3
+; AVX-NEXT: .LBB4_2: # %if.then
+; AVX-NEXT: vmovaps {{.*}}(%rip), %xmm1
+; AVX-NEXT: callq __multf3
+; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; AVX-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
+; AVX-NEXT: andq {{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
+; AVX-NEXT: orq %rdx, %rax
+; AVX-NEXT: .LBB4_3: # %if.end
+; AVX-NEXT: movq %rcx, (%rsp)
+; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovaps (%rsp), %xmm0
+; AVX-NEXT: addq $56, %rsp
+; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
%bf.cast = and i128 %0, 170135991163610696904058773219554885632
@@ -223,18 +307,31 @@ if.end:
; return x + df;
; }
define fp128 @TestI128_4(fp128 %x) #0 {
-; CHECK-LABEL: TestI128_4:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: movaps %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: movaps (%rsp), %xmm0
-; CHECK-NEXT: callq __addtf3
-; CHECK-NEXT: addq $40, %rsp
-; CHECK-NEXT: retq
+; SSE-LABEL: TestI128_4:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: subq $40, %rsp
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, (%rsp)
+; SSE-NEXT: movaps (%rsp), %xmm0
+; SSE-NEXT: callq __addtf3
+; SSE-NEXT: addq $40, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestI128_4:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: vmovaps %xmm0, %xmm1
+; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movq $0, (%rsp)
+; AVX-NEXT: vmovaps (%rsp), %xmm0
+; AVX-NEXT: callq __addtf3
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
%bf.clear = and i128 %0, -18446744073709551616
@@ -271,18 +368,31 @@ entry:
}
define fp128 @acosl(fp128 %x) #0 {
-; CHECK-LABEL: acosl:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: movaps %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: movaps (%rsp), %xmm0
-; CHECK-NEXT: callq __addtf3
-; CHECK-NEXT: addq $40, %rsp
-; CHECK-NEXT: retq
+; SSE-LABEL: acosl:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: subq $40, %rsp
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, (%rsp)
+; SSE-NEXT: movaps (%rsp), %xmm0
+; SSE-NEXT: callq __addtf3
+; SSE-NEXT: addq $40, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: acosl:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: vmovaps %xmm0, %xmm1
+; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; AVX-NEXT: movq $0, (%rsp)
+; AVX-NEXT: vmovaps (%rsp), %xmm0
+; AVX-NEXT: callq __addtf3
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
%bf.clear = and i128 %0, -18446744073709551616
@@ -293,15 +403,25 @@ entry:
; Compare i128 values and check i128 constants.
define fp128 @TestComp(fp128 %x, fp128 %y) #0 {
-; CHECK-LABEL: TestComp:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: jns .LBB8_2
-; CHECK-NEXT: # %bb.1: # %entry
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: .LBB8_2: # %entry
-; CHECK-NEXT: retq
+; SSE-LABEL: TestComp:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: jns .LBB8_2
+; SSE-NEXT: # %bb.1: # %entry
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: .LBB8_2: # %entry
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestComp:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: jns .LBB8_2
+; AVX-NEXT: # %bb.1: # %entry
+; AVX-NEXT: vmovaps %xmm1, %xmm0
+; AVX-NEXT: .LBB8_2: # %entry
+; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
%cmp = icmp sgt i128 %0, -1
@@ -313,10 +433,15 @@ declare void @foo(fp128) #1
; Test logical operations on fp128 values.
define fp128 @TestFABS_LD(fp128 %x) #0 {
-; CHECK-LABEL: TestFABS_LD:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: TestFABS_LD:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestFABS_LD:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%call = tail call fp128 @fabsl(fp128 %x) #2
ret fp128 %call
@@ -328,43 +453,79 @@ declare fp128 @copysignl(fp128, fp128) #
; Test more complicated logical operations generated from copysignl.
define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 {
-; CHECK-LABEL: TestCopySign:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: movq %rdi, %rbx
-; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
-; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: callq __gttf2
-; CHECK-NEXT: movl %eax, %ebp
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: movaps %xmm0, %xmm1
-; CHECK-NEXT: callq __subtf3
-; CHECK-NEXT: testl %ebp, %ebp
-; CHECK-NEXT: jle .LBB10_1
-; CHECK-NEXT: # %bb.2: # %if.then
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: movaps %xmm0, %xmm1
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: movaps %xmm1, %xmm2
-; CHECK-NEXT: jmp .LBB10_3
-; CHECK-NEXT: .LBB10_1:
-; CHECK-NEXT: movaps (%rsp), %xmm2 # 16-byte Reload
-; CHECK-NEXT: .LBB10_3: # %cleanup
-; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: andps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: orps %xmm1, %xmm0
-; CHECK-NEXT: movaps %xmm2, (%rbx)
-; CHECK-NEXT: movaps %xmm0, 16(%rbx)
-; CHECK-NEXT: movq %rbx, %rax
-; CHECK-NEXT: addq $40, %rsp
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: retq
+; SSE-LABEL: TestCopySign:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: subq $40, %rsp
+; SSE-NEXT: movq %rdi, %rbx
+; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: callq __gttf2
+; SSE-NEXT: movl %eax, %ebp
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: callq __subtf3
+; SSE-NEXT: testl %ebp, %ebp
+; SSE-NEXT: jle .LBB10_1
+; SSE-NEXT: # %bb.2: # %if.then
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: andps {{.*}}(%rip), %xmm1
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: jmp .LBB10_3
+; SSE-NEXT: .LBB10_1:
+; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; SSE-NEXT: .LBB10_3: # %cleanup
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
+; SSE-NEXT: andps {{.*}}(%rip), %xmm2
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
+; SSE-NEXT: movaps %xmm1, (%rbx)
+; SSE-NEXT: movaps %xmm0, 16(%rbx)
+; SSE-NEXT: movq %rbx, %rax
+; SSE-NEXT: addq $40, %rsp
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: TestCopySign:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rbp
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; AVX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm1
+; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: callq __gttf2
+; AVX-NEXT: movl %eax, %ebp
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX-NEXT: vmovaps %xmm0, %xmm1
+; AVX-NEXT: callq __subtf3
+; AVX-NEXT: testl %ebp, %ebp
+; AVX-NEXT: jle .LBB10_1
+; AVX-NEXT: # %bb.2: # %if.then
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX-NEXT: vmovaps %xmm1, %xmm2
+; AVX-NEXT: jmp .LBB10_3
+; AVX-NEXT: .LBB10_1:
+; AVX-NEXT: vmovaps (%rsp), %xmm2 # 16-byte Reload
+; AVX-NEXT: .LBB10_3: # %cleanup
+; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovaps %xmm2, (%rbx)
+; AVX-NEXT: vmovaps %xmm0, 16(%rbx)
+; AVX-NEXT: movq %rbx, %rax
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: popq %rbx
+; AVX-NEXT: popq %rbp
+; AVX-NEXT: retq
entry:
%z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0
%z.real = load fp128, fp128* %z.realp, align 16
More information about the llvm-commits
mailing list