[llvm] [X86] Fast AVX-512-VNNI vpdpwssd tuning (PR #85375)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 15 02:41:17 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 141145232f915b44aef6e3854f091da03c41a2b6 df56289a07dd121257df6b902dec16a0cb823ed8 -- llvm/lib/Target/X86/X86InstrInfo.cpp llvm/lib/Target/X86/X86TargetTransformInfo.h
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 43ae6fd590..bf3907a853 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10581,15 +10581,15 @@ bool X86InstrInfo::getMachineCombinerPatterns(
case X86::VPDPWSSDZ256m:
case X86::VPDPWSSDZr:
case X86::VPDPWSSDZm: {
- if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
+ if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
Patterns.push_back(MachineCombinerPattern::DPWSSD);
return true;
}
break;
}
}
- return TargetInstrInfo::getMachineCombinerPatterns(Root,
- Patterns, DoRegPressureReduce);
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
}
static void
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 23035f6550..bdaf9b2f13 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -42,72 +42,43 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureX86_64,
// These features don't have any intrinsics or ABI effect.
- X86::FeatureNOPL,
- X86::FeatureCX16,
- X86::FeatureLAHFSAHF64,
+ X86::FeatureNOPL, X86::FeatureCX16, X86::FeatureLAHFSAHF64,
// Some older targets can be setup to fold unaligned loads.
X86::FeatureSSEUnalignedMem,
// Codegen control options.
- X86::TuningFast11ByteNOP,
- X86::TuningFast15ByteNOP,
- X86::TuningFastBEXTR,
- X86::TuningFastHorizontalOps,
- X86::TuningFastLZCNT,
- X86::TuningFastScalarFSQRT,
- X86::TuningFastSHLDRotate,
- X86::TuningFastScalarShiftMasks,
- X86::TuningFastVectorShiftMasks,
+ X86::TuningFast11ByteNOP, X86::TuningFast15ByteNOP, X86::TuningFastBEXTR,
+ X86::TuningFastHorizontalOps, X86::TuningFastLZCNT,
+ X86::TuningFastScalarFSQRT, X86::TuningFastSHLDRotate,
+ X86::TuningFastScalarShiftMasks, X86::TuningFastVectorShiftMasks,
X86::TuningFastVariableCrossLaneShuffle,
- X86::TuningFastVariablePerLaneShuffle,
- X86::TuningFastVectorFSQRT,
- X86::TuningLEAForSP,
- X86::TuningLEAUsesAG,
- X86::TuningLZCNTFalseDeps,
- X86::TuningBranchFusion,
- X86::TuningMacroFusion,
- X86::TuningPadShortFunctions,
- X86::TuningPOPCNTFalseDeps,
- X86::TuningMULCFalseDeps,
- X86::TuningPERMFalseDeps,
- X86::TuningRANGEFalseDeps,
- X86::TuningGETMANTFalseDeps,
- X86::TuningMULLQFalseDeps,
- X86::TuningSlow3OpsLEA,
- X86::TuningSlowDivide32,
- X86::TuningSlowDivide64,
- X86::TuningSlowIncDec,
- X86::TuningSlowLEA,
- X86::TuningSlowPMADDWD,
- X86::TuningSlowPMULLD,
- X86::TuningSlowSHLD,
- X86::TuningSlowTwoMemOps,
- X86::TuningSlowUAMem16,
- X86::TuningPreferMaskRegisters,
- X86::TuningInsertVZEROUPPER,
- X86::TuningUseSLMArithCosts,
- X86::TuningUseGLMDivSqrtCosts,
- X86::TuningNoDomainDelay,
- X86::TuningNoDomainDelayMov,
- X86::TuningNoDomainDelayShuffle,
- X86::TuningNoDomainDelayBlend,
- X86::TuningPreferShiftShuffle,
- X86::TuningFastImmVectorShift,
+ X86::TuningFastVariablePerLaneShuffle, X86::TuningFastVectorFSQRT,
+ X86::TuningLEAForSP, X86::TuningLEAUsesAG, X86::TuningLZCNTFalseDeps,
+ X86::TuningBranchFusion, X86::TuningMacroFusion,
+ X86::TuningPadShortFunctions, X86::TuningPOPCNTFalseDeps,
+ X86::TuningMULCFalseDeps, X86::TuningPERMFalseDeps,
+ X86::TuningRANGEFalseDeps, X86::TuningGETMANTFalseDeps,
+ X86::TuningMULLQFalseDeps, X86::TuningSlow3OpsLEA,
+ X86::TuningSlowDivide32, X86::TuningSlowDivide64, X86::TuningSlowIncDec,
+ X86::TuningSlowLEA, X86::TuningSlowPMADDWD, X86::TuningSlowPMULLD,
+ X86::TuningSlowSHLD, X86::TuningSlowTwoMemOps, X86::TuningSlowUAMem16,
+ X86::TuningPreferMaskRegisters, X86::TuningInsertVZEROUPPER,
+ X86::TuningUseSLMArithCosts, X86::TuningUseGLMDivSqrtCosts,
+ X86::TuningNoDomainDelay, X86::TuningNoDomainDelayMov,
+ X86::TuningNoDomainDelayShuffle, X86::TuningNoDomainDelayBlend,
+ X86::TuningPreferShiftShuffle, X86::TuningFastImmVectorShift,
X86::TuningFastDPWSSD,
// Perf-tuning flags.
- X86::TuningFastGather,
- X86::TuningSlowUAMem32,
+ X86::TuningFastGather, X86::TuningSlowUAMem32,
X86::TuningAllowLight256Bit,
// Based on whether user set the -mprefer-vector-width command line.
- X86::TuningPrefer128Bit,
- X86::TuningPrefer256Bit,
+ X86::TuningPrefer128Bit, X86::TuningPrefer256Bit,
// CPU name enums. These just follow CPU string.
- X86::ProcIntelAtom
- };
+ X86::ProcIntelAtom};
public:
explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
``````````
</details>
https://github.com/llvm/llvm-project/pull/85375
More information about the llvm-commits
mailing list