[llvm] r303084 - CodeGen: BlockPlacement: Increase tail duplication size for O3.
Vitaly Buka via llvm-commits
llvm-commits at lists.llvm.org
Mon May 22 13:56:41 PDT 2017
It's started in the range with the patch
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/1355
And I can locally reproduce it.
On Mon, May 22, 2017 at 1:54 PM, Vitaly Buka <vitalybuka at google.com> wrote:
> This is also caused by the patch:
> http://lab.llvm.org:8011/builders/sanitizer-x86_64-
> linux-bootstrap/builds/1393/steps/check-llvm%20check-
> clang%20stage3%2Fmsan/logs/stdio
>
> FAILED: tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o
>
> /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm_build_msan/bin/clang++ -DGTEST_HAS_RTTI=0 -DLLVM_BUILD_GLOBAL_ISEL -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Itools/llvm-xray -I/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/llvm-xray -Iinclude -I/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/include -fPIC -fvisibility-inlines-hidden -Werror=date-time -std=c++11 -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wcovered-switch-default -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wstring-conversion -fcolor-diagnostics -ffunction-sections -fdata-sections -O3 -UNDEBUG -fno-exceptions -fno-rtti -MD -MT tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o -MF tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o.d -o tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o -c /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/llvm-xray/xray-color-helper.cc
> ==7293==WARNING: MemorySanitizer: use-of-uninitialized-value
> #0 0x2f6e6c8 in llvm::X86InstrInfo::AnalyzeBranchImpl(llvm::MachineBasicBlock&, llvm::MachineBasicBlock*&, llvm::MachineBasicBlock*&, llvm::SmallVectorImpl<llvm::MachineOperand>&, llvm::SmallVectorImpl<llvm::MachineInstr*>&, bool) const /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/Target/X86/X86InstrInfo.cpp:6093:22
> #1 0x2f6e7ad in llvm::X86InstrInfo::analyzeBranch(llvm::MachineBasicBlock&, llvm::MachineBasicBlock*&, llvm::MachineBasicBlock*&, llvm::SmallVectorImpl<llvm::MachineOperand>&, bool) const /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/Target/X86/X86InstrInfo.cpp:6133:10
> #2 0x3d390dc in llvm::TailDuplicator::tailDuplicate(bool, llvm::MachineBasicBlock*, llvm::MachineBasicBlock*, llvm::SmallVectorImpl<llvm::MachineBasicBlock*>&, llvm::SmallVectorImpl<llvm::MachineInstr*>&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/TailDuplicator.cpp:837:10 #3 0x3d35e4a in llvm::TailDuplicator::tailDuplicateAndUpdate(bool, llvm::MachineBasicBlock*, llvm::MachineBasicBlock*, llvm::SmallVectorImpl<llvm::MachineBasicBlock*>*, llvm::function_ref<void (llvm::MachineBasicBlock*)>*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/TailDuplicator.cpp:148:8
> #4 0x3f2451b in (anonymous namespace)::MachineBlockPlacement::maybeTailDuplicateBlock(llvm::MachineBasicBlock*, llvm::MachineBasicBlock*, (anonymous namespace)::BlockChain&, llvm::SmallSetVector<llvm::MachineBasicBlock const*, 16u>*, llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::MachineBasicBlock, true, false, void>, false, false>&, bool&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2620:11
> #5 0x3f0a25d in repeatedlyTailDuplicateBlock /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2501:13
> #6 0x3f0a25d in (anonymous namespace)::MachineBlockPlacement::buildChain(llvm::MachineBasicBlock const*, (anonymous namespace)::BlockChain&, llvm::SmallSetVector<llvm::MachineBasicBlock const*, 16u>*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:1699
> #7 0x3ef1c70 in (anonymous namespace)::MachineBlockPlacement::buildCFGChains() /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2278:3
> #8 0x3eec7a9 in (anonymous namespace)::MachineBlockPlacement::runOnMachineFunction(llvm::MachineFunction&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2696:3
> #9 0x39a8ffc in llvm::MachineFunctionPass::runOnFunction(llvm::Function&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineFunctionPass.cpp:62:13
> #10 0x431de6e in llvm::FPPassManager::runOnFunction(llvm::Function&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1519:27
> #11 0x431e534 in llvm::FPPassManager::runOnModule(llvm::Module&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1540:16
> #12 0x431f853 in runOnModule /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1596:27
> #13 0x431f853 in llvm::legacy::PassManagerImpl::run(llvm::Module&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1699
> #14 0x577960e in EmitAssembly /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp:792:19
> #15 0x577960e in clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::DataLayout const&, llvm::Module*, clang::BackendAction, std::__1::unique_ptr<llvm::raw_pwrite_stream, std::__1::default_delete<llvm::raw_pwrite_stream> >) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp:1101
> #16 0x6f1b90e in clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp:258:7
> #17 0x7ec96b6 in clang::ParseAST(clang::Sema&, bool, bool) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Parse/ParseAST.cpp:159:13
> #18 0x64339ba in clang::FrontendAction::Execute() /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Frontend/FrontendAction.cpp:837:8
> #19 0x637572c in clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp:970:11
> #20 0x65fedd5 in clang::ExecuteCompilerInvocation(clang::CompilerInstance*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:249:25
> #21 0x977555 in cc1_main(llvm::ArrayRef<char const*>, char const*, void*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/cc1_main.cpp:221:13
> #22 0x971417 in ExecuteCC1Tool /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/driver.cpp:299:12
> #23 0x971417 in main /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/driver.cpp:380
> #24 0x7fee6047d82f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f)
> #25 0x8f1468 in _start (/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm_build_msan/bin/clang-5.0+0x8f1468)
>
>
>
> On Mon, May 15, 2017 at 7:20 PM, Vitaly Buka <vitalybuka at google.com>
> wrote:
>
>> Thanks, I've updated the test in r303136
>>
>> On Mon, May 15, 2017 at 7:14 PM Dmitry Vyukov <dvyukov at google.com> wrote:
>>
>>> I've benchmarks this change on the tsan benchmark:
>>> projects/compiler-rt/lib/tsan/tests/rtl/TsanRtlTest
>>> --gtest_also_run_disabled_tests --gtest_filter=DISABLED_BENCH.Mop*
>>> (some numbers below)
>>>
>>> So let's just update check_analyze for the new reality.
>>>
>>>
>>> $ egrep "OK.*Mop1Read" /tmp/old
>>> [ OK ] DISABLED_BENCH.Mop1Read (5317 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5907 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5356 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5376 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5339 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5226 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5261 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5464 ms)
>>> $ egrep "OK.*Mop1Read" /tmp/new
>>> [ OK ] DISABLED_BENCH.Mop1Read (5324 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5238 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5344 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5327 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5373 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5208 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5332 ms)
>>> [ OK ] DISABLED_BENCH.Mop1Read (5351 ms)
>>>
>>>
>>> $ egrep "OK.*Mop8Write" /tmp/old
>>> [ OK ] DISABLED_BENCH.Mop8Write (1624 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1623 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1575 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1625 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1619 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1585 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1586 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1627 ms)
>>> $ egrep "OK.*Mop8Write" /tmp/new
>>> [ OK ] DISABLED_BENCH.Mop8Write (1760 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1634 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1630 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1632 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1601 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1599 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1589 ms)
>>> [ OK ] DISABLED_BENCH.Mop8Write (1625 ms)
>>>
>>>
>>>
>>> On Mon, May 15, 2017 at 5:10 PM, Vitaly Buka <vitalybuka at google.com>
>>> wrote:
>>> > +Dmitry Vyukov
>>> >
>>> > On Mon, May 15, 2017 at 5:09 PM Vitaly Buka <vitalybuka at google.com>
>>> wrote:
>>> >>
>>> >> This test is broken after the patch:
>>> >> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-
>>> autoconf/builds/8452/steps/tsan%20analyze/logs/stdio
>>> >>
>>> >> On Mon, May 15, 2017 at 10:44 AM Kyle Butt via llvm-commits
>>> >> <llvm-commits at lists.llvm.org> wrote:
>>> >>>
>>> >>> Author: iteratee
>>> >>> Date: Mon May 15 12:30:47 2017
>>> >>> New Revision: 303084
>>> >>>
>>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=303084&view=rev
>>> >>> Log:
>>> >>> CodeGen: BlockPlacement: Increase tail duplication size for O3.
>>> >>>
>>> >>> At O3 we are more willing to increase size if we believe it will
>>> improve
>>> >>> performance. The current threshold for tail-duplication of 2
>>> instructions
>>> >>> is
>>> >>> conservative, and can be relaxed at O3.
>>> >>>
>>> >>> Benchmark results:
>>> >>> llvm test-suite:
>>> >>> 6% improvement in aha, due to duplication of loop latch
>>> >>> 3% improvement in hexxagon
>>> >>>
>>> >>> 2% slowdown in lpbench. Seems related, but couldn't completely
>>> diagnose.
>>> >>>
>>> >>> Internal google benchmark:
>>> >>> Produces 4% improvement on internal google protocol buffer
>>> serialization
>>> >>> benchmarks.
>>> >>>
>>> >>> Differential-Revision: https://reviews.llvm.org/D32324
>>> >>>
>>> >>> Modified:
>>> >>> llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>>> >>> llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll
>>> >>> llvm/trunk/test/CodeGen/X86/sse1.ll
>>> >>>
>>> >>> Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>>> >>> URL:
>>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/
>>> MachineBlockPlacement.cpp?rev=303084&r1=303083&r2=303084&view=diff
>>> >>>
>>> >>> ============================================================
>>> ==================
>>> >>> --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
>>> >>> +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Mon May 15
>>> 12:30:47
>>> >>> 2017
>>> >>> @@ -133,6 +133,14 @@ static cl::opt<unsigned> TailDupPlacemen
>>> >>> "that won't conflict."), cl::init(2),
>>> >>> cl::Hidden);
>>> >>>
>>> >>> +// Heuristic for aggressive tail duplication.
>>> >>> +static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
>>> >>> + "tail-dup-placement-aggressive-threshold",
>>> >>> + cl::desc("Instruction cutoff for aggressive tail duplication
>>> during
>>> >>> "
>>> >>> + "layout. Used at -O3. Tail merging during layout is
>>> forced
>>> >>> to "
>>> >>> + "have a threshold that won't conflict."), cl::init(3),
>>> >>> + cl::Hidden);
>>> >>> +
>>> >>> // Heuristic for tail duplication.
>>> >>> static cl::opt<unsigned> TailDupPlacementPenalty(
>>> >>> "tail-dup-placement-penalty",
>>> >>> @@ -2646,9 +2654,26 @@ bool MachineBlockPlacement::runOnMachine
>>> >>> assert(BlockToChain.empty());
>>> >>> assert(ComputedEdges.empty());
>>> >>>
>>> >>> + unsigned TailDupSize = TailDupPlacementThreshold;
>>> >>> + // If only the aggressive threshold is explicitly set, use it.
>>> >>> + if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0
>>> &&
>>> >>> + TailDupPlacementThreshold.getNumOccurrences() == 0)
>>> >>> + TailDupSize = TailDupPlacementAggressiveThreshold;
>>> >>> +
>>> >>> + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
>>> >>> + // For agressive optimization, we can adjust some thresholds to be
>>> >>> less
>>> >>> + // conservative.
>>> >>> + if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
>>> >>> + // At O3 we should be more willing to copy blocks for tail
>>> >>> duplication. This
>>> >>> + // increases size pressure, so we only do it at O3
>>> >>> + // Do this unless only the regular threshold is explicitly set.
>>> >>> + if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
>>> >>> + TailDupPlacementAggressiveThreshold.getNumOccurrences() !=
>>> 0)
>>> >>> + TailDupSize = TailDupPlacementAggressiveThreshold;
>>> >>> + }
>>> >>> +
>>> >>> if (TailDupPlacement) {
>>> >>> MPDT = &getAnalysis<MachinePostDominatorTree>();
>>> >>> - unsigned TailDupSize = TailDupPlacementThreshold;
>>> >>> if (MF.getFunction()->optForSize())
>>> >>> TailDupSize = 1;
>>> >>> TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
>>> >>> @@ -2658,7 +2683,6 @@ bool MachineBlockPlacement::runOnMachine
>>> >>> buildCFGChains();
>>> >>>
>>> >>> // Changing the layout can create new tail merging opportunities.
>>> >>> - TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
>>> >>> // TailMerge can create jump into if branches that make CFG
>>> >>> irreducible for
>>> >>> // HW that requires structured CFG.
>>> >>> bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
>>> >>> @@ -2666,7 +2690,7 @@ bool MachineBlockPlacement::runOnMachine
>>> >>> BranchFoldPlacement;
>>> >>> // No tail merging opportunities if the block number is less than
>>> >>> four.
>>> >>> if (MF.size() > 3 && EnableTailMerge) {
>>> >>> - unsigned TailMergeSize = TailDupPlacementThreshold + 1;
>>> >>> + unsigned TailMergeSize = TailDupSize + 1;
>>> >>> BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false,
>>> >>> *MBFI,
>>> >>> *MBPI, TailMergeSize);
>>> >>>
>>> >>>
>>> >>> Modified: llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll
>>> >>> URL:
>>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>>> PowerPC/tail-dup-layout.ll?rev=303084&r1=303083&r2=303084&view=diff
>>> >>>
>>> >>> ============================================================
>>> ==================
>>> >>> --- llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll (original)
>>> >>> +++ llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll Mon May 15
>>> >>> 12:30:47 2017
>>> >>> @@ -1,4 +1,5 @@
>>> >>> -; RUN: llc -O2 < %s | FileCheck %s
>>> >>> +; RUN: llc -O2 -o - %s | FileCheck --check-prefix=CHECK
>>> >>> --check-prefix=CHECK-O2 %s
>>> >>> +; RUN: llc -O3 -o - %s | FileCheck --check-prefix=CHECK
>>> >>> --check-prefix=CHECK-O3 %s
>>> >>> target datalayout = "e-m:e-i64:64-n32:64"
>>> >>> target triple = "powerpc64le-grtev4-linux-gnu"
>>> >>>
>>> >>> @@ -99,11 +100,9 @@ exit:
>>> >>> ; test1
>>> >>> ; test2
>>> >>> ; test3
>>> >>> -; test4
>>> >>> ; optional1
>>> >>> ; optional2
>>> >>> ; optional3
>>> >>> -; optional4
>>> >>> ; exit
>>> >>> ; even for 50/50 branches.
>>> >>> ; Tail duplication puts test n+1 at the end of optional n
>>> >>> @@ -157,6 +156,98 @@ test3:
>>> >>> br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
>>> >>> optional3:
>>> >>> call void @c()
>>> >>> + br label %exit
>>> >>> +exit:
>>> >>> + ret void
>>> >>> +}
>>> >>> +
>>> >>> +; Intended layout:
>>> >>> +; The chain-of-triangles based duplicating produces the layout when
>>> 3
>>> >>> +; instructions are allowed for tail-duplication.
>>> >>> +; test1
>>> >>> +; test2
>>> >>> +; test3
>>> >>> +; optional1
>>> >>> +; optional2
>>> >>> +; optional3
>>> >>> +; exit
>>> >>> +;
>>> >>> +; Otherwise it produces the layout:
>>> >>> +; test1
>>> >>> +; optional1
>>> >>> +; test2
>>> >>> +; optional2
>>> >>> +; test3
>>> >>> +; optional3
>>> >>> +; exit
>>> >>> +
>>> >>> +;CHECK-LABEL: straight_test_3_instr_test:
>>> >>> +; test1 may have been merged with entry
>>> >>> +;CHECK: mr [[TAGREG:[0-9]+]], 3
>>> >>> +;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
>>> >>> +;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
>>> >>> +
>>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
>>> >>> +;CHECK-O3-NEXT: # %test2
>>> >>> +;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
>>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
>>> >>> +;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
>>> >>> +;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
>>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
>>> >>> +;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
>>> >>> +;CHECK-O3: blr
>>> >>> +;CHECK-O3-NEXT: .[[OPT1LABEL]]:
>>> >>> +;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
>>> >>> +;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
>>> >>> +;CHECK-O3-NEXT: .[[OPT2LABEL]]:
>>> >>> +;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
>>> >>> +;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
>>> >>> +;CHECK-O3-NEXT: .[[OPT3LABEL]]:
>>> >>> +;CHECK-O3: b .[[EXITLABEL]]
>>> >>> +
>>> >>> +;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
>>> >>> +;CHECK-O2-NEXT: # %optional1
>>> >>> +;CHECK-O2: .[[TEST2LABEL]]: # %test2
>>> >>> +;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>>> >>> +;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
>>> >>> +;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
>>> >>> +;CHECK-O2-NEXT: # %optional2
>>> >>> +;CHECK-O2: .[[TEST3LABEL]]: # %test3
>>> >>> +;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>>> >>> +;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
>>> >>> +;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
>>> >>> +;CHECK-O2-NEXT: # %optional3
>>> >>> +;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
>>> >>> +;CHECK-O2: blr
>>> >>> +
>>> >>> +
>>> >>> +define void @straight_test_3_instr_test(i32 %tag) {
>>> >>> +entry:
>>> >>> + br label %test1
>>> >>> +test1:
>>> >>> + %tagbit1 = and i32 %tag, 3
>>> >>> + %tagbit1eq0 = icmp eq i32 %tagbit1, 2
>>> >>> + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
>>> >>> +optional1:
>>> >>> + call void @a()
>>> >>> + br label %test2
>>> >>> +test2:
>>> >>> + %tagbit2 = and i32 %tag, 12
>>> >>> + %tagbit2eq0 = icmp eq i32 %tagbit2, 8
>>> >>> + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
>>> >>> +optional2:
>>> >>> + call void @b()
>>> >>> + br label %test3
>>> >>> +test3:
>>> >>> + %tagbit3 = and i32 %tag, 48
>>> >>> + %tagbit3eq0 = icmp eq i32 %tagbit3, 32
>>> >>> + br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
>>> >>> +optional3:
>>> >>> + call void @c()
>>> >>> br label %exit
>>> >>> exit:
>>> >>> ret void
>>> >>>
>>> >>> Modified: llvm/trunk/test/CodeGen/X86/sse1.ll
>>> >>> URL:
>>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>>> X86/sse1.ll?rev=303084&r1=303083&r2=303084&view=diff
>>> >>>
>>> >>> ============================================================
>>> ==================
>>> >>> --- llvm/trunk/test/CodeGen/X86/sse1.ll (original)
>>> >>> +++ llvm/trunk/test/CodeGen/X86/sse1.ll Mon May 15 12:30:47 2017
>>> >>> @@ -66,7 +66,10 @@ define <4 x float> @vselect(<4 x float>*
>>> >>> ; X32-NEXT: jne .LBB1_8
>>> >>> ; X32-NEXT: .LBB1_7:
>>> >>> ; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
>>> >>> -; X32-NEXT: jmp .LBB1_9
>>> >>> +; X32-NEXT: unpcklps {{.*#+}} xmm2 =
>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>> >>> +; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
>>> >>> +; X32-NEXT: je .LBB1_10
>>> >>> +; X32-NEXT: jmp .LBB1_11
>>> >>> ; X32-NEXT: .LBB1_1:
>>> >>> ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
>>> >>> ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
>>> >>> @@ -77,11 +80,10 @@ define <4 x float> @vselect(<4 x float>*
>>> >>> ; X32-NEXT: je .LBB1_7
>>> >>> ; X32-NEXT: .LBB1_8: # %entry
>>> >>> ; X32-NEXT: xorps %xmm3, %xmm3
>>> >>> -; X32-NEXT: .LBB1_9: # %entry
>>> >>> ; X32-NEXT: unpcklps {{.*#+}} xmm2 =
>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>> >>> ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
>>> >>> ; X32-NEXT: jne .LBB1_11
>>> >>> -; X32-NEXT: # BB#10:
>>> >>> +; X32-NEXT: .LBB1_10:
>>> >>> ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
>>> >>> ; X32-NEXT: .LBB1_11: # %entry
>>> >>> ; X32-NEXT: unpcklps {{.*#+}} xmm0 =
>>> xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>>> >>> @@ -103,7 +105,10 @@ define <4 x float> @vselect(<4 x float>*
>>> >>> ; X64-NEXT: jne .LBB1_8
>>> >>> ; X64-NEXT: .LBB1_7:
>>> >>> ; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
>>> >>> -; X64-NEXT: jmp .LBB1_9
>>> >>> +; X64-NEXT: unpcklps {{.*#+}} xmm2 =
>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>> >>> +; X64-NEXT: testl %esi, %esi
>>> >>> +; X64-NEXT: je .LBB1_10
>>> >>> +; X64-NEXT: jmp .LBB1_11
>>> >>> ; X64-NEXT: .LBB1_1:
>>> >>> ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
>>> >>> ; X64-NEXT: testl %edx, %edx
>>> >>> @@ -114,11 +119,10 @@ define <4 x float> @vselect(<4 x float>*
>>> >>> ; X64-NEXT: je .LBB1_7
>>> >>> ; X64-NEXT: .LBB1_8: # %entry
>>> >>> ; X64-NEXT: xorps %xmm3, %xmm3
>>> >>> -; X64-NEXT: .LBB1_9: # %entry
>>> >>> ; X64-NEXT: unpcklps {{.*#+}} xmm2 =
>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>> >>> ; X64-NEXT: testl %esi, %esi
>>> >>> ; X64-NEXT: jne .LBB1_11
>>> >>> -; X64-NEXT: # BB#10:
>>> >>> +; X64-NEXT: .LBB1_10:
>>> >>> ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
>>> >>> ; X64-NEXT: .LBB1_11: # %entry
>>> >>> ; X64-NEXT: unpcklps {{.*#+}} xmm0 =
>>> xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>>> >>>
>>> >>>
>>> >>> _______________________________________________
>>> >>> llvm-commits mailing list
>>> >>> llvm-commits at lists.llvm.org
>>> >>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170522/22b0fadc/attachment-0001.html>
More information about the llvm-commits
mailing list