[llvm] r277961 - [AVX-512] Add 512-bit logical operations to load folding tables. Add avx512f stack folding test and move some tests from the avx512vl test.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 7 10:14:09 PDT 2016
Author: ctopper
Date: Sun Aug 7 12:14:09 2016
New Revision: 277961
URL: http://llvm.org/viewvc/llvm-project?rev=277961&view=rev
Log:
[AVX-512] Add 512-bit logical operations to load folding tables. Add avx512f stack folding test and move some tests from the avx512vl test.
Added:
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
- copied, changed from r277960, llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=277961&r1=277960&r2=277961&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Aug 7 12:14:09 2016
@@ -1720,6 +1720,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, 0 },
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
{ X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, 0 },
+ { X86::VANDPDZrr, X86::VANDPDZrm, 0 },
+ { X86::VANDPSZrr, X86::VANDPSZrm, 0 },
+ { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 },
+ { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
+ { X86::VORPDZrr, X86::VORPDZrm, 0 },
+ { X86::VORPSZrr, X86::VORPSZrm, 0 },
+ { X86::VXORPDZrr, X86::VXORPDZrm, 0 },
+ { X86::VXORPSZrr, X86::VXORPSZrm, 0 },
+ { X86::VPANDDZrr, X86::VPANDDZrm, 0 },
+ { X86::VPANDQZrr, X86::VPANDQZrm, 0 },
+ { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
+ { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
+ { X86::VPORDZrr, X86::VPORDZrm, 0 },
+ { X86::VPORQZrr, X86::VPORQZrm, 0 },
+ { X86::VPXORDZrr, X86::VPXORDZrm, 0 },
+ { X86::VPXORQZrr, X86::VPXORQZrm, 0 },
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
{ X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
{ X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
Copied: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll (from r277960, llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll?p2=llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll&p1=llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll&r1=277960&r2=277961&rev=277961&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll Sun Aug 7 12:14:09 2016
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -8,36 +8,20 @@ target triple = "x86_64-unknown-unknown"
; By including a nop call with sideeffects we can force a partial register spill of the
; relevant registers and check that the reload is correctly folded into the instruction.
-define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_addpd
- ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+define <8 x double> @stack_fold_addpd_zmm(<8 x double> %a0, <8 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addpd_zmm
+ ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fadd <2 x double> %a0, %a1
- ret <2 x double> %2
+ %2 = fadd <8 x double> %a0, %a1
+ ret <8 x double> %2
}
-define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) {
- ;CHECK-LABEL: stack_fold_addpd_ymm
- ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+define <16 x float> @stack_fold_addps_zmm(<16 x float> %a0, <16 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addps_zmm
+ ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fadd <4 x double> %a0, %a1
- ret <4 x double> %2
-}
-
-define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_addps
- ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fadd <4 x float> %a0, %a1
- ret <4 x float> %2
-}
-
-define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) {
- ;CHECK-LABEL: stack_fold_addps_ymm
- ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fadd <8 x float> %a0, %a1
- ret <8 x float> %2
+ %2 = fadd <16 x float> %a0, %a1
+ ret <16 x float> %2
}
define double @stack_fold_addsd(double %a0, double %a1) {
@@ -78,112 +62,58 @@ define <4 x float> @stack_fold_addss_int
ret <4 x float> %5
}
-define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_andnpd
- ;CHECK: vpandnq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = bitcast <2 x double> %a0 to <2 x i64>
- %3 = bitcast <2 x double> %a1 to <2 x i64>
- %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
- %5 = and <2 x i64> %4, %3
- %6 = bitcast <2 x i64> %5 to <2 x double>
- ; fadd forces execution domain
- %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
- ret <2 x double> %7
-}
-
-define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) {
- ;CHECK-LABEL: stack_fold_andnpd_ymm
- ;CHECK: vpandnq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = bitcast <4 x double> %a0 to <4 x i64>
- %3 = bitcast <4 x double> %a1 to <4 x i64>
- %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
- %5 = and <4 x i64> %4, %3
- %6 = bitcast <4 x i64> %5 to <4 x double>
- ; fadd forces execution domain
- %7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0>
- ret <4 x double> %7
-}
-
-define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_andnps
- ;CHECK: vpandnq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = bitcast <4 x float> %a0 to <2 x i64>
- %3 = bitcast <4 x float> %a1 to <2 x i64>
- %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
- %5 = and <2 x i64> %4, %3
- %6 = bitcast <2 x i64> %5 to <4 x float>
- ; fadd forces execution domain
- %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
- ret <4 x float> %7
-}
-
-define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) {
- ;CHECK-LABEL: stack_fold_andnps_ymm
- ;CHECK: vpandnq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = bitcast <8 x float> %a0 to <4 x i64>
- %3 = bitcast <8 x float> %a1 to <4 x i64>
- %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
- %5 = and <4 x i64> %4, %3
- %6 = bitcast <4 x i64> %5 to <8 x float>
- ; fadd forces execution domain
- %7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
- ret <8 x float> %7
-}
-
-define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_andpd
- ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <2 x double> %a0 to <2 x i64>
- %3 = bitcast <2 x double> %a1 to <2 x i64>
- %4 = and <2 x i64> %2, %3
- %5 = bitcast <2 x i64> %4 to <2 x double>
+define <8 x double> @stack_fold_andnpd_zmm(<8 x double> %a0, <8 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andnpd_zmm
+ ;CHECK: vpandnq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = bitcast <8 x double> %a0 to <8 x i64>
+ %3 = bitcast <8 x double> %a1 to <8 x i64>
+ %4 = xor <8 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+ %5 = and <8 x i64> %4, %3
+ %6 = bitcast <8 x i64> %5 to <8 x double>
; fadd forces execution domain
- %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
- ret <2 x double> %6
+ %7 = fadd <8 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <8 x double> %7
}
-define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) {
- ;CHECK-LABEL: stack_fold_andpd_ymm
- ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <4 x double> %a0 to <4 x i64>
- %3 = bitcast <4 x double> %a1 to <4 x i64>
- %4 = and <4 x i64> %2, %3
- %5 = bitcast <4 x i64> %4 to <4 x double>
+define <16 x float> @stack_fold_andnps_zmm(<16 x float> %a0, <16 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andnps_zmm
+ ;CHECK: vpandnd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = bitcast <16 x float> %a0 to <16 x i32>
+ %3 = bitcast <16 x float> %a1 to <16 x i32>
+ %4 = xor <16 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %5 = and <16 x i32> %4, %3
+ %6 = bitcast <16 x i32> %5 to <16 x float>
; fadd forces execution domain
- %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
- ret <4 x double> %6
+ %7 = fadd <16 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <16 x float> %7
}
-define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_andps
- ;CHECK: vpandd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <4 x float> %a0 to <4 x i32>
- %3 = bitcast <4 x float> %a1 to <4 x i32>
- %4 = and <4 x i32> %2, %3
- %5 = bitcast <4 x i32> %4 to <4 x float>
+define <8 x double> @stack_fold_andpd_zmm(<8 x double> %a0, <8 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andpd_zmm
+ ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = bitcast <8 x double> %a0 to <8 x i64>
+ %3 = bitcast <8 x double> %a1 to <8 x i64>
+ %4 = and <8 x i64> %2, %3
+ %5 = bitcast <8 x i64> %4 to <8 x double>
; fadd forces execution domain
- %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
- ret <4 x float> %6
+ %6 = fadd <8 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <8 x double> %6
}
-define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) {
- ;CHECK-LABEL: stack_fold_andps_ymm
- ;CHECK: vpandd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <8 x float> %a0 to <8 x i32>
- %3 = bitcast <8 x float> %a1 to <8 x i32>
- %4 = and <8 x i32> %2, %3
- %5 = bitcast <8 x i32> %4 to <8 x float>
+define <16 x float> @stack_fold_andps_zmm(<16 x float> %a0, <16 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andps_zmm
+ ;CHECK: vpandd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = bitcast <16 x float> %a0 to <16 x i32>
+ %3 = bitcast <16 x float> %a1 to <16 x i32>
+ %4 = and <16 x i32> %2, %3
+ %5 = bitcast <16 x i32> %4 to <16 x float>
; fadd forces execution domain
- %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
- ret <8 x float> %6
+ %6 = fadd <16 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <16 x float> %6
}
define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
@@ -226,72 +156,72 @@ define <4 x float> @stack_fold_insertps(
}
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
-define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
- ;CHECK-LABEL: stack_fold_maxpd
- ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+define <8 x double> @stack_fold_maxpd_zmm(<8 x double> %a0, <8 x double> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_maxpd_zmm
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
- ret <2 x double> %2
+ %2 = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ ret <8 x double> %2
}
-declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
-define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
- ;CHECK-LABEL: stack_fold_maxpd_commutable
- ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+define <8 x double> @stack_fold_maxpd_zmm_commutable(<8 x double> %a0, <8 x double> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_maxpd_zmm_commutable
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
- ret <2 x double> %2
+ %2 = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ ret <8 x double> %2
}
-define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
- ;CHECK-LABEL: stack_fold_maxpd_ymm
- ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+define <16 x float> @stack_fold_maxps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_maxps_zmm
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
- ret <4 x double> %2
+ %2 = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %2
}
-declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 {
- ;CHECK-LABEL: stack_fold_maxpd_ymm_commutable
- ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+define <16 x float> @stack_fold_maxps_zmm_commutable(<16 x float> %a0, <16 x float> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_maxps_zmm_commutable
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
- ret <4 x double> %2
+ %2 = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %2
}
-define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
- ;CHECK-LABEL: stack_fold_maxps
- ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+define <8 x double> @stack_fold_minpd_zmm(<8 x double> %a0, <8 x double> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_minpd_zmm
+ ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
- ret <4 x float> %2
+ %2 = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ ret <8 x double> %2
}
-declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
-define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
- ;CHECK-LABEL: stack_fold_maxps_commutable
- ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+define <8 x double> @stack_fold_minpd_zmm_commutable(<8 x double> %a0, <8 x double> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_minpd_zmm_commutable
+ ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
- ret <4 x float> %2
+ %2 = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ ret <8 x double> %2
}
-define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
- ;CHECK-LABEL: stack_fold_maxps_ymm
- ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+define <16 x float> @stack_fold_minps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_minps_zmm
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
- ret <8 x float> %2
+ %2 = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %2
}
-declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
- ;CHECK-LABEL: stack_fold_maxps_ymm_commutable
- ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+define <16 x float> @stack_fold_minps_zmm_commutable(<16 x float> %a0, <16 x float> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_minps_zmm_commutable
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
- ret <8 x float> %2
+ %2 = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %2
}
define double @stack_fold_mulsd(double %a0, double %a1) {
@@ -332,88 +262,46 @@ define <4 x float> @stack_fold_mulss_int
ret <4 x float> %5
}
-define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_orpd
- ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <2 x double> %a0 to <2 x i64>
- %3 = bitcast <2 x double> %a1 to <2 x i64>
- %4 = or <2 x i64> %2, %3
- %5 = bitcast <2 x i64> %4 to <2 x double>
- ; fadd forces execution domain
- %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
- ret <2 x double> %6
-}
-
-define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) {
- ;CHECK-LABEL: stack_fold_orpd_ymm
- ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <4 x double> %a0 to <4 x i64>
- %3 = bitcast <4 x double> %a1 to <4 x i64>
- %4 = or <4 x i64> %2, %3
- %5 = bitcast <4 x i64> %4 to <4 x double>
- ; fadd forces execution domain
- %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
- ret <4 x double> %6
-}
-
-define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_orps
- ;CHECK: vpord {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <4 x float> %a0 to <4 x i32>
- %3 = bitcast <4 x float> %a1 to <4 x i32>
- %4 = or <4 x i32> %2, %3
- %5 = bitcast <4 x i32> %4 to <4 x float>
- ; fadd forces execution domain
- %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
- ret <4 x float> %6
-}
-
-define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) {
- ;CHECK-LABEL: stack_fold_orps_ymm
- ;CHECK: vpord {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <8 x float> %a0 to <8 x i32>
- %3 = bitcast <8 x float> %a1 to <8 x i32>
- %4 = or <8 x i32> %2, %3
- %5 = bitcast <8 x i32> %4 to <8 x float>
- ; fadd forces execution domain
- %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
- ret <8 x float> %6
-}
-
-define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_subpd
- ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+define <8 x double> @stack_fold_orpd_zmm(<8 x double> %a0, <8 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_orpd_zmm
+ ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fsub <2 x double> %a0, %a1
- ret <2 x double> %2
+ %2 = bitcast <8 x double> %a0 to <8 x i64>
+ %3 = bitcast <8 x double> %a1 to <8 x i64>
+ %4 = or <8 x i64> %2, %3
+ %5 = bitcast <8 x i64> %4 to <8 x double>
+ ; fadd forces execution domain
+ %6 = fadd <8 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <8 x double> %6
}
-define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) {
- ;CHECK-LABEL: stack_fold_subpd_ymm
- ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+define <16 x float> @stack_fold_orps_zmm(<16 x float> %a0, <16 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_orps_zmm
+ ;CHECK: vpord {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fsub <4 x double> %a0, %a1
- ret <4 x double> %2
+ %2 = bitcast <16 x float> %a0 to <16 x i32>
+ %3 = bitcast <16 x float> %a1 to <16 x i32>
+ %4 = or <16 x i32> %2, %3
+ %5 = bitcast <16 x i32> %4 to <16 x float>
+ ; fadd forces execution domain
+ %6 = fadd <16 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <16 x float> %6
}
-define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_subps
- ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+define <8 x double> @stack_fold_subpd_zmm(<8 x double> %a0, <8 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_subpd_zmm
+ ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fsub <4 x float> %a0, %a1
- ret <4 x float> %2
+ %2 = fsub <8 x double> %a0, %a1
+ ret <8 x double> %2
}
-define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) {
- ;CHECK-LABEL: stack_fold_subps_ymm
- ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+define <16 x float> @stack_fold_subps_zmm(<16 x float> %a0, <16 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_subps_zmm
+ ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fsub <8 x float> %a0, %a1
- ret <8 x float> %2
+ %2 = fsub <16 x float> %a0, %a1
+ ret <16 x float> %2
}
define double @stack_fold_subsd(double %a0, double %a1) {
@@ -454,56 +342,30 @@ define <4 x float> @stack_fold_subss_int
ret <4 x float> %5
}
-define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_xorpd
- ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <2 x double> %a0 to <2 x i64>
- %3 = bitcast <2 x double> %a1 to <2 x i64>
- %4 = xor <2 x i64> %2, %3
- %5 = bitcast <2 x i64> %4 to <2 x double>
- ; fadd forces execution domain
- %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
- ret <2 x double> %6
-}
-
-define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) {
- ;CHECK-LABEL: stack_fold_xorpd_ymm
- ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <4 x double> %a0 to <4 x i64>
- %3 = bitcast <4 x double> %a1 to <4 x i64>
- %4 = xor <4 x i64> %2, %3
- %5 = bitcast <4 x i64> %4 to <4 x double>
- ; fadd forces execution domain
- %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
- ret <4 x double> %6
-}
-
-define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_xorps
- ;CHECK: vpxord {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <4 x float> %a0 to <4 x i32>
- %3 = bitcast <4 x float> %a1 to <4 x i32>
- %4 = xor <4 x i32> %2, %3
- %5 = bitcast <4 x i32> %4 to <4 x float>
- ; fadd forces execution domain
- %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
- ret <4 x float> %6
-}
-
-define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
- ;CHECK-LABEL: stack_fold_xorps_ymm
- ;CHECK: vpxord {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = bitcast <8 x float> %a0 to <8 x i32>
- %3 = bitcast <8 x float> %a1 to <8 x i32>
- %4 = xor <8 x i32> %2, %3
- %5 = bitcast <8 x i32> %4 to <8 x float>
+define <8 x double> @stack_fold_xorpd_zmm(<8 x double> %a0, <8 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_xorpd_zmm
+ ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = bitcast <8 x double> %a0 to <8 x i64>
+ %3 = bitcast <8 x double> %a1 to <8 x i64>
+ %4 = xor <8 x i64> %2, %3
+ %5 = bitcast <8 x i64> %4 to <8 x double>
+ ; fadd forces execution domain
+ %6 = fadd <8 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <8 x double> %6
+}
+
+define <16 x float> @stack_fold_xorps_zmm(<16 x float> %a0, <16 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_xorps_zmm
+ ;CHECK: vpxord {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = bitcast <16 x float> %a0 to <16 x i32>
+ %3 = bitcast <16 x float> %a1 to <16 x i32>
+ %4 = xor <16 x i32> %2, %3
+ %5 = bitcast <16 x i32> %4 to <16 x float>
; fadd forces execution domain
- %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
- ret <8 x float> %6
+ %6 = fadd <16 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <16 x float> %6
}
attributes #0 = { "unsafe-fp-math"="false" }
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll?rev=277961&r1=277960&r2=277961&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll Sun Aug 7 12:14:09 2016
@@ -40,44 +40,6 @@ define <8 x float> @stack_fold_addps_ymm
ret <8 x float> %2
}
-define double @stack_fold_addsd(double %a0, double %a1) {
- ;CHECK-LABEL: stack_fold_addsd
- ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fadd double %a0, %a1
- ret double %2
-}
-
-define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_addsd_int
- ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <2 x double> %a0, i32 0
- %3 = extractelement <2 x double> %a1, i32 0
- %4 = fadd double %2, %3
- %5 = insertelement <2 x double> %a0, double %4, i32 0
- ret <2 x double> %5
-}
-
-define float @stack_fold_addss(float %a0, float %a1) {
- ;CHECK-LABEL: stack_fold_addss
- ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fadd float %a0, %a1
- ret float %2
-}
-
-define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_addss_int
- ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <4 x float> %a0, i32 0
- %3 = extractelement <4 x float> %a1, i32 0
- %4 = fadd float %2, %3
- %5 = insertelement <4 x float> %a0, float %4, i32 0
- ret <4 x float> %5
-}
-
define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_andnpd
;CHECK: vpandnq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
@@ -186,46 +148,6 @@ define <8 x float> @stack_fold_andps_ymm
ret <8 x float> %6
}
-define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_divsd_int
- ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <2 x double> %a0, i32 0
- %3 = extractelement <2 x double> %a1, i32 0
- %4 = fdiv double %2, %3
- %5 = insertelement <2 x double> %a0, double %4, i32 0
- ret <2 x double> %5
-}
-
-define float @stack_fold_divss(float %a0, float %a1) {
- ;CHECK-LABEL: stack_fold_divss
- ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fdiv float %a0, %a1
- ret float %2
-}
-
-define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_divss_int
- ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <4 x float> %a0, i32 0
- %3 = extractelement <4 x float> %a1, i32 0
- %4 = fdiv float %2, %3
- %5 = insertelement <4 x float> %a0, float %4, i32 0
- ret <4 x float> %5
-}
-
-define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_insertps
- ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
- ret <4 x float> %2
-}
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
-
define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
;CHECK-LABEL: stack_fold_maxpd
;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
@@ -294,42 +216,38 @@ define <8 x float> @stack_fold_maxps_ymm
ret <8 x float> %2
}
-define double @stack_fold_mulsd(double %a0, double %a1) {
- ;CHECK-LABEL: stack_fold_mulsd
- ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fmul double %a0, %a1
- ret double %2
+define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_minps
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
-define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_mulsd_int
- ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <2 x double> %a0, i32 0
- %3 = extractelement <2 x double> %a1, i32 0
- %4 = fmul double %2, %3
- %5 = insertelement <2 x double> %a0, double %4, i32 0
- ret <2 x double> %5
+define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_minps_commutable
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
}
-define float @stack_fold_mulss(float %a0, float %a1) {
- ;CHECK-LABEL: stack_fold_mulss
- ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fmul float %a0, %a1
- ret float %2
+define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_minps_ymm
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
}
+declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
-define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_mulss_int
- ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <4 x float> %a0, i32 0
- %3 = extractelement <4 x float> %a1, i32 0
- %4 = fmul float %2, %3
- %5 = insertelement <4 x float> %a0, float %4, i32 0
- ret <4 x float> %5
+define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_minps_ymm_commutable
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
}
define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
@@ -416,44 +334,6 @@ define <8 x float> @stack_fold_subps_ymm
ret <8 x float> %2
}
-define double @stack_fold_subsd(double %a0, double %a1) {
- ;CHECK-LABEL: stack_fold_subsd
- ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fsub double %a0, %a1
- ret double %2
-}
-
-define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
- ;CHECK-LABEL: stack_fold_subsd_int
- ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <2 x double> %a0, i32 0
- %3 = extractelement <2 x double> %a1, i32 0
- %4 = fsub double %2, %3
- %5 = insertelement <2 x double> %a0, double %4, i32 0
- ret <2 x double> %5
-}
-
-define float @stack_fold_subss(float %a0, float %a1) {
- ;CHECK-LABEL: stack_fold_subss
- ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = fsub float %a0, %a1
- ret float %2
-}
-
-define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
- ;CHECK-LABEL: stack_fold_subss_int
- ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
- %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = extractelement <4 x float> %a0, i32 0
- %3 = extractelement <4 x float> %a1, i32 0
- %4 = fsub float %2, %3
- %5 = insertelement <4 x float> %a0, float %4, i32 0
- ret <4 x float> %5
-}
-
define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_xorpd
;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
More information about the llvm-commits
mailing list