[lld] 21a4710 - [ThinLTO] Pass CodeGenOpts like UnrollLoops/VectorizeLoop/VectorizeSLP

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 9 21:14:09 PST 2020


Author: Wei Mi
Date: 2020-01-09T21:13:11-08:00
New Revision: 21a4710c67a97838dd75cf60ed24da11280800f8

URL: https://github.com/llvm/llvm-project/commit/21a4710c67a97838dd75cf60ed24da11280800f8
DIFF: https://github.com/llvm/llvm-project/commit/21a4710c67a97838dd75cf60ed24da11280800f8.diff

LOG: [ThinLTO] Pass CodeGenOpts like UnrollLoops/VectorizeLoop/VectorizeSLP
down to pass builder in ltobackend.

Currently CodeGenOpts like UnrollLoops/VectorizeLoop/VectorizeSLP in clang
are not passed down to pass builder in ltobackend when new pass manager is
used. This is inconsistent with the behavior when new pass manager is used
and thinlto is not used. Such inconsistency causes slp vectorization pass
not being enabled in ltobackend for O3 + thinlto right now. This patch
fixes that.

Differential Revision: https://reviews.llvm.org/D72386

Added: 
    clang/test/CodeGen/thinlto-slp-vectorize-pm.c
    lld/test/ELF/lto/slp-vectorize-pm.ll
    llvm/test/tools/gold/X86/slp-vectorize-pm.ll
    llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll

Modified: 
    clang/lib/CodeGen/BackendUtil.cpp
    lld/COFF/CMakeLists.txt
    lld/ELF/CMakeLists.txt
    lld/ELF/LTO.cpp
    lld/wasm/CMakeLists.txt
    llvm/include/llvm/LTO/Config.h
    llvm/lib/LTO/LTOBackend.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/test/Other/new-pm-defaults.ll
    llvm/test/Other/new-pm-thinlto-defaults.ll
    llvm/tools/gold/gold-plugin.cpp
    llvm/tools/llvm-lto2/CMakeLists.txt
    llvm/tools/llvm-lto2/llvm-lto2.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index ed881f2ddf68..d58bcf5a7905 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1437,6 +1437,12 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
   Conf.OptLevel = CGOpts.OptimizationLevel;
   initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
   Conf.SampleProfile = std::move(SampleProfile);
+  Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
+  // For historical reasons, loop interleaving is set to mirror setting for loop
+  // unrolling.
+  Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
+  Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop;
+  Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP;
 
   // Context sensitive profile.
   if (CGOpts.hasProfileCSIRInstr()) {

diff  --git a/clang/test/CodeGen/thinlto-slp-vectorize-pm.c b/clang/test/CodeGen/thinlto-slp-vectorize-pm.c
new file mode 100644
index 000000000000..4700da6ed0ef
--- /dev/null
+++ b/clang/test/CodeGen/thinlto-slp-vectorize-pm.c
@@ -0,0 +1,50 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
+// RUN: llvm-lto -thinlto -o %t %t.o
+
+// Test to ensure the slp vectorize codegen option is passed down to the
+// ThinLTO backend. -vectorize-slp is a cc1 option and will be added
+// automatically when O2/O3/Os/Oz is available for clang. Once -vectorize-slp
+// is enabled, "-mllvm -vectorize-slp=false" won't disable slp vectorization
+// currently. "-mllvm -vectorize-slp=false" is added here in the test to
+// ensure the slp vectorization is executed because the -vectorize-slp cc1
+// flag is passed down, not because "-mllvm -vectorize-slp" is enabled
+// by default.
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-SLP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-SLP
+// O2-SLP: Running pass: SLPVectorizerPass
+// O0-SLP-NOT: Running pass: SLPVectorizerPass
+
+// Test to ensure the loop vectorize codegen option is passed down to the
+// ThinLTO backend. -vectorize-loops is a cc1 option and will be added
+// automatically when O2/O3/Os is available for clang. Once -vectorize-loops is
+// enabled, "-mllvm -vectorize-loops=false" won't disable loop vectorization
+// currently. "-mllvm -vectorize-loops=false" is added here in the test to
+// ensure the loop vectorization is executed because the -vectorize-loops cc1
+// flag is passed down, not because "-mllvm -vectorize-loops" is enabled
+// by default.
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV
+// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+
+// Test to ensure the loop interleave codegen option is passed down to the
+// ThinLTO backend. The internal loop interleave codegen option will be
+// enabled automatically when O2/O3 is available for clang. Once the loop
+// interleave option is enabled, "-mllvm -interleave-loops=false" won't disable
+// the interleave. currently. "-mllvm -interleave-loops=false" is added here
+// in the test to ensure the loop interleave is executed because the interleave
+// codegen flag is passed down, not because "-mllvm -interleave-loops" is
+// enabled by default.
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave
+// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1}
+// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+
+void foo(double *a) {
+  for (int i = 0; i < 1000; i++)
+    a[i] = 10;
+}

diff  --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt
index 7c5e8b79b7fe..1d310f8419f5 100644
--- a/lld/COFF/CMakeLists.txt
+++ b/lld/COFF/CMakeLists.txt
@@ -37,6 +37,7 @@ add_lld_library(lldCOFF
   MC
   Object
   Option
+  Passes
   Support
   WindowsManifest
 

diff  --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 7531a94b86e2..b89f4436288a 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -57,6 +57,7 @@ add_lld_library(lldELF
   MC
   Object
   Option
+  Passes
   Support
 
   LINK_LIBS

diff  --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 524d552b0b84..2148ac500291 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -93,6 +93,9 @@ static lto::Config createConfig() {
   c.MAttrs = getMAttrs();
   c.CGOptLevel = args::getCGOptLevel(config->ltoo);
 
+  c.PTO.LoopVectorization = c.OptLevel > 1;
+  c.PTO.SLPVectorization = c.OptLevel > 1;
+
   // Set up a custom pipeline if we've been asked to.
   c.OptPipeline = config->ltoNewPmPasses;
   c.AAPipeline = config->ltoAAPipeline;

diff  --git a/lld/test/ELF/lto/slp-vectorize-pm.ll b/lld/test/ELF/lto/slp-vectorize-pm.ll
new file mode 100644
index 000000000000..ca8e4365680a
--- /dev/null
+++ b/lld/test/ELF/lto/slp-vectorize-pm.ll
@@ -0,0 +1,48 @@
+; REQUIRES: x86
+; RUN: opt -module-summary %s -o %t.o
+
+; Test SLP and Loop Vectorization are enabled by default at O2 and O3.
+; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O0 --plugin-opt=save-temps -shared -o %t1.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV
+
+; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O1 --plugin-opt=save-temps -shared -o %t2.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV
+
+; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O2 --plugin-opt=save-temps -shared -o %t3.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV
+
+; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O3 --plugin-opt=save-temps -shared -o %t4.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV
+
+; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass
+; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass
+; CHECK-O2-SLP: Running pass: SLPVectorizerPass
+; CHECK-O3-SLP: Running pass: SLPVectorizerPass
+; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret i32 %add
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable", i1 true}

diff  --git a/lld/wasm/CMakeLists.txt b/lld/wasm/CMakeLists.txt
index 9e6075e1f7f1..d2ba862c1e4a 100644
--- a/lld/wasm/CMakeLists.txt
+++ b/lld/wasm/CMakeLists.txt
@@ -29,6 +29,7 @@ add_lld_library(lldWasm
   MC
   Object
   Option
+  Passes
   Support
 
   LINK_LIBS

diff  --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index d5eae4df6537..50147300f7f7 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -18,6 +18,7 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Passes/PassBuilder.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetOptions.h"
 
@@ -128,6 +129,9 @@ struct Config {
   /// with llvm-lto2.
   std::unique_ptr<raw_ostream> ResolutionFile;
 
+  /// Tunable parameters for passes in the default pipelines.
+  PipelineTuningOptions PTO;
+
   /// The following callbacks deal with tasks, which normally represent the
   /// entire optimization and code generation pipeline for what will become a
   /// single native object file. Each task has a unique identifier between 0 and

diff  --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 4c5302d15f0c..ef40d24b2a91 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -172,7 +172,7 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
   PassInstrumentationCallbacks PIC;
   StandardInstrumentations SI;
   SI.registerCallbacks(PIC);
-  PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC);
+  PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC);
   AAManager AA;
 
   // Parse a custom AA pipeline if asked to.

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 646eb7d26cbd..953b688c7f8e 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1902,6 +1902,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
       return Error::success();
     }
 
+    // This is consistent with old pass manager invoked via opt, but
+    // inconsistent with clang. Clang doesn't enable loop vectorization
+    // but does enable slp vectorization at Oz.
+    PTO.LoopVectorization = L > O1 && L < Oz;
+    PTO.SLPVectorization = L > O1 && L < Oz;
+
     if (Matches[1] == "default") {
       MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
     } else if (Matches[1] == "thinlto-pre-link") {

diff  --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index bece4d46e976..1dc96ef3a14c 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -251,6 +251,9 @@
 ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O2-NEXT: Running pass: SLPVectorizerPass
+; CHECK-O3-NEXT: Running pass: SLPVectorizerPass
+; CHECK-Os-NEXT: Running pass: SLPVectorizerPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass

diff  --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index a071f243dbcf..48d59dd6aa77 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -221,6 +221,9 @@
 ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-POSTLINK-O2-NEXT: Running pass: SLPVectorizerPass
+; CHECK-POSTLINK-O3-NEXT: Running pass: SLPVectorizerPass
+; CHECK-POSTLINK-Os-NEXT: Running pass: SLPVectorizerPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass

diff  --git a/llvm/test/tools/gold/X86/slp-vectorize-pm.ll b/llvm/test/tools/gold/X86/slp-vectorize-pm.ll
new file mode 100644
index 000000000000..26d11fe8a146
--- /dev/null
+++ b/llvm/test/tools/gold/X86/slp-vectorize-pm.ll
@@ -0,0 +1,79 @@
+; RUN: opt -module-summary %s -o %t.o
+
+; Test SLP and Loop Vectorization are enabled by default at O2 and O3.
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
+; RUN:     --plugin-opt=thinlto \
+; RUN:     --plugin-opt=new-pass-manager \
+; RUN:     --plugin-opt=debug-pass-manager \
+; RUN:     --plugin-opt=cache-dir=%t.cache \
+; RUN:     --plugin-opt=O0 \
+; RUN:     --plugin-opt=save-temps \
+; RUN:     -shared \
+; RUN:     -o %t2.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV
+
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
+; RUN:     --plugin-opt=thinlto \
+; RUN:     --plugin-opt=new-pass-manager \
+; RUN:     --plugin-opt=debug-pass-manager \
+; RUN:     --plugin-opt=cache-dir=%t.cache \
+; RUN:     --plugin-opt=O1 \
+; RUN:     --plugin-opt=save-temps \
+; RUN:     -shared \
+; RUN:     -o %t3.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV
+
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
+; RUN:     --plugin-opt=thinlto \
+; RUN:     --plugin-opt=new-pass-manager \
+; RUN:     --plugin-opt=debug-pass-manager \
+; RUN:     --plugin-opt=cache-dir=%t.cache \
+; RUN:     --plugin-opt=O2 \
+; RUN:     --plugin-opt=save-temps \
+; RUN:     -shared \
+; RUN:     -o %t4.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV
+
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
+; RUN:     --plugin-opt=thinlto \
+; RUN:     --plugin-opt=new-pass-manager \
+; RUN:     --plugin-opt=debug-pass-manager \
+; RUN:     --plugin-opt=cache-dir=%t.cache \
+; RUN:     --plugin-opt=O3 \
+; RUN:     --plugin-opt=save-temps \
+; RUN:     -shared \
+; RUN:     -o %t5.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP
+; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV
+
+; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass
+; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass
+; CHECK-O2-SLP: Running pass: SLPVectorizerPass
+; CHECK-O3-SLP: Running pass: SLPVectorizerPass
+; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret i32 %add
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable", i1 true}

diff  --git a/llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll b/llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll
new file mode 100644
index 000000000000..2d9ef9d1bfc9
--- /dev/null
+++ b/llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll
@@ -0,0 +1,51 @@
+; RUN: opt -module-summary %s -o %t1.bc
+
+; Test SLP and Loop Vectorization are enabled by default at O2 and O3.
+; RUN: llvm-lto2 run %t1.bc -o %t2.o -O0 -r %t1.bc,foo,plx -debug-pass-manager \
+; RUN:  -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP
+; RUN: llvm-dis %t2.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV
+
+; RUN: llvm-lto2 run %t1.bc -o %t3.o -O1 -r %t1.bc,foo,plx -debug-pass-manager \
+; RUN:  -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP
+; RUN: llvm-dis %t3.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV
+
+; RUN: llvm-lto2 run %t1.bc -o %t4.o -O2 -r %t1.bc,foo,plx -debug-pass-manager \
+; RUN:  -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP
+; RUN: llvm-dis %t4.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV
+
+; RUN: llvm-lto2 run %t1.bc -o %t5.o -O3 -r %t1.bc,foo,plx -debug-pass-manager \
+; RUN:  -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP
+; RUN: llvm-dis %t5.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV
+
+; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass
+; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass
+; CHECK-O2-SLP: Running pass: SLPVectorizerPass
+; CHECK-O3-SLP: Running pass: SLPVectorizerPass
+; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret i32 %add
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable", i1 true}

diff  --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
index d355184f866c..406079dad307 100644
--- a/llvm/tools/gold/gold-plugin.cpp
+++ b/llvm/tools/gold/gold-plugin.cpp
@@ -860,6 +860,9 @@ static std::unique_ptr<LTO> createLTO(IndexWriteCallback OnIndexWrite,
   Conf.CGOptLevel = getCGOptLevel();
   Conf.DisableVerify = options::DisableVerify;
   Conf.OptLevel = options::OptLevel;
+  Conf.PTO.LoopVectorization = options::OptLevel > 1;
+  Conf.PTO.SLPVectorization = options::OptLevel > 1;
+
   if (options::Parallelism)
     Backend = createInProcessThinBackend(options::Parallelism);
   if (options::thinlto_index_only) {

diff  --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt
index 7f2db01c9c91..fa2d8624fd94 100644
--- a/llvm/tools/llvm-lto2/CMakeLists.txt
+++ b/llvm/tools/llvm-lto2/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS
   LTO
   MC
   Object
+  Passes
   Support
   Target
   )

diff  --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 5e3b3dcb6c31..67a677dd45fb 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -270,6 +270,8 @@ static int run(int argc, char **argv) {
   Conf.OverrideTriple = OverrideTriple;
   Conf.DefaultTriple = DefaultTriple;
   Conf.StatsFile = StatsFile;
+  Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
+  Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
 
   ThinBackend Backend;
   if (ThinLTODistributedIndexes)


        


More information about the llvm-commits mailing list