[llvm] r312097 - [X86] Provide a separate feature bit for macro fusion support instead of basing it on the AVX flag
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 29 21:34:48 PDT 2017
Author: ctopper
Date: Tue Aug 29 21:34:48 2017
New Revision: 312097
URL: http://llvm.org/viewvc/llvm-project?rev=312097&view=rev
Log:
[X86] Provide a separate feature bit for macro fusion support instead of basing it on the AVX flag
Summary:
Currently we determine if macro fusion is supported based on the AVX flag as a proxy for the processor being Sandy Bridge".
This is really strange as now AMD supports AVX. It also means if user explicitly disables AVX we disable macro fusion.
This patch adds an explicit macro fusion feature. I've also enabled for the generic 64-bit CPU (which doesn't have AVX)
This is probably another candidate for being in the MI layer, but for now I at least wanted to correct the overloading of the AVX feature.
Reviewers: spatel, chandlerc, RKSimon, zvi
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D37280
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86MacroFusion.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.h
llvm/trunk/test/CodeGen/X86/avx-select.ll
llvm/trunk/test/CodeGen/X86/avx-splat.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Tue Aug 29 21:34:48 2017
@@ -288,6 +288,13 @@ def FeatureERMSB
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
+// Sandy Bridge and newer processors have many instructions that can be
+// fused with conditional branches and pass through the CPU as a single
+// operation.
+def FeatureMacroFusion
+ : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
+ "Various instructions can be fused with conditional branches">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@@ -372,7 +379,8 @@ def : ProcessorModel<"core2", SandyBridg
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
@@ -382,7 +390,8 @@ def : ProcessorModel<"penryn", SandyBrid
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Atom CPUs.
@@ -468,7 +477,8 @@ class NehalemProc<string Name> : Process
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
@@ -485,7 +495,8 @@ class WestmereProc<string Name> : Proces
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : WestmereProc<"westmere">;
@@ -516,7 +527,8 @@ def SNBFeatures : ProcessorFeatures<[],
FeatureLAHFSAHF,
FeatureSlow3OpsLEA,
FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate
+ FeatureFastSHLDRotate,
+ FeatureMacroFusion
]>;
class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
@@ -731,7 +743,8 @@ def : Proc<"bdver1", [
FeatureXSAVE,
FeatureLWP,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Piledriver
def : Proc<"bdver2", [
@@ -755,7 +768,8 @@ def : Proc<"bdver2", [
FeatureLWP,
FeatureFMA,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Steamroller
@@ -782,7 +796,8 @@ def : Proc<"bdver3", [
FeatureXSAVEOPT,
FeatureSlowSHLD,
FeatureFSGSBase,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Excavator
@@ -810,7 +825,8 @@ def : Proc<"bdver4", [
FeatureSlowSHLD,
FeatureFSGSBase,
FeatureLAHFSAHF,
- FeatureMWAITX
+ FeatureMWAITX,
+ FeatureMacroFusion
]>;
// Znver1
@@ -830,6 +846,7 @@ def: ProcessorModel<"znver1", Znver1Mode
FeatureFastLZCNT,
FeatureLAHFSAHF,
FeatureLZCNT,
+ FeatureMacroFusion,
FeatureMMX,
FeatureMOVBE,
FeatureMWAITX,
@@ -873,7 +890,8 @@ def : ProcessorModel<"x86-64", SandyBrid
Feature64Bit,
FeatureSlow3OpsLEA,
FeatureSlowBTMem,
- FeatureSlowIncDec
+ FeatureSlowIncDec,
+ FeatureMacroFusion
]>;
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86MacroFusion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MacroFusion.cpp?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MacroFusion.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MacroFusion.cpp Tue Aug 29 21:34:48 2017
@@ -27,10 +27,8 @@ static bool shouldScheduleAdjacent(const
const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
- // Check if this processor supports macro-fusion. Since this is a minor
- // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
- // proxy for SandyBridge+.
- if (!ST.hasAVX())
+ // Check if this processor supports macro-fusion.
+ if (!ST.hasMacroFusion())
return false;
enum {
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Tue Aug 29 21:34:48 2017
@@ -347,6 +347,7 @@ void X86Subtarget::initializeEnvironment
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
HasFastSHLDRotate = false;
+ HasMacroFusion = false;
HasERMSB = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Tue Aug 29 21:34:48 2017
@@ -238,6 +238,9 @@ protected:
/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate;
+ /// True if the processor supports macrofusion.
+ bool HasMacroFusion;
+
/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB;
@@ -488,6 +491,7 @@ public:
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+ bool hasMacroFusion() const { return HasMacroFusion; }
bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
Modified: llvm/trunk/test/CodeGen/X86/avx-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-select.ll?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-select.ll Tue Aug 29 21:34:48 2017
@@ -16,8 +16,8 @@ define <8 x i32> @select00(i32 %a, <8 x
;
; X64-LABEL: select00:
; X64: # BB#0:
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: je .LBB0_2
; X64-NEXT: # BB#1:
; X64-NEXT: vmovaps %ymm0, %ymm1
@@ -44,8 +44,8 @@ define <4 x i64> @select01(i32 %a, <4 x
;
; X64-LABEL: select01:
; X64: # BB#0:
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: je .LBB1_2
; X64-NEXT: # BB#1:
; X64-NEXT: vmovaps %ymm0, %ymm1
Modified: llvm/trunk/test/CodeGen/X86/avx-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-splat.ll?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-splat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-splat.ll Tue Aug 29 21:34:48 2017
@@ -60,8 +60,8 @@ define <8 x float> @funcE() nounwind {
; CHECK-LABEL: funcE:
; CHECK: # BB#0: # %for_exit499
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: jne .LBB4_2
; CHECK-NEXT: # BB#1: # %load.i1247
; CHECK-NEXT: pushq %rbp
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Tue Aug 29 21:34:48 2017
@@ -692,8 +692,8 @@ define <16 x i8> @test8(<16 x i32>%a, <1
;
; AVX512BW-LABEL: test8:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: cmpl %esi, %edi
+; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: jg LBB17_1
; AVX512BW-NEXT: ## BB#2:
; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0
@@ -708,8 +708,8 @@ define <16 x i8> @test8(<16 x i32>%a, <1
;
; AVX512DQ-LABEL: test8:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: cmpl %esi, %edi
+; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: jg LBB17_1
; AVX512DQ-NEXT: ## BB#2:
; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Tue Aug 29 21:34:48 2017
@@ -1678,8 +1678,8 @@ define <4 x float> @uitofp_2i64_to_4f32(
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB39_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: testq %rax, %rax
+; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB39_8
; VEX-NEXT: # BB#7:
; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
@@ -1914,8 +1914,8 @@ define <4 x float> @uitofp_4i64_to_4f32_
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB41_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: testq %rax, %rax
+; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB41_8
; VEX-NEXT: # BB#7:
; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll?rev=312097&r1=312096&r2=312097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll Tue Aug 29 21:34:48 2017
@@ -296,9 +296,9 @@ while.end:
; CHECK-LABEL: Transform
; CHECK-NOT: cmov
; CHECK: divl [[a:%[0-9a-z]*]]
-; CHECK: cmpl [[a]], %eax
; CHECK: movl $11, [[s1:%[0-9a-z]*]]
; CHECK: movl [[a]], [[s2:%[0-9a-z]*]]
+; CHECK: cmpl [[a]], %edx
; CHECK: ja [[SinkBB:.*]]
; CHECK: [[FalseBB:.*]]:
; CHECK: movl $22, [[s1]]
More information about the llvm-commits
mailing list