[llvm] r303084 - CodeGen: BlockPlacement: Increase tail duplication size for O3.

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Mon May 22 14:35:33 PDT 2017


The patch is likely just exposed preexisting issue.
Fixed with r303581.

On Mon, May 22, 2017 at 1:56 PM, Vitaly Buka <vitalybuka at google.com> wrote:

> It's started in the range with the patch http://lab.llvm.org:8011/
> builders/sanitizer-x86_64-linux-bootstrap/builds/1355
> And I can locally reproduce it.
>
> On Mon, May 22, 2017 at 1:54 PM, Vitaly Buka <vitalybuka at google.com>
> wrote:
>
>> This is also caused by the patch:
>> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-
>> bootstrap/builds/1393/steps/check-llvm%20check-clang%
>> 20stage3%2Fmsan/logs/stdio
>>
>> FAILED: tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o
>>
>> /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm_build_msan/bin/clang++   -DGTEST_HAS_RTTI=0 -DLLVM_BUILD_GLOBAL_ISEL -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Itools/llvm-xray -I/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/llvm-xray -Iinclude -I/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/include -fPIC -fvisibility-inlines-hidden -Werror=date-time -std=c++11 -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wcovered-switch-default -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wstring-conversion -fcolor-diagnostics -ffunction-sections -fdata-sections -O3    -UNDEBUG  -fno-exceptions -fno-rtti -MD -MT tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o -MF tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o.d -o tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o -c /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/llvm-xray/xray-color-helper.cc
>> ==7293==WARNING: MemorySanitizer: use-of-uninitialized-value
>>     #0 0x2f6e6c8 in llvm::X86InstrInfo::AnalyzeBranchImpl(llvm::MachineBasicBlock&, llvm::MachineBasicBlock*&, llvm::MachineBasicBlock*&, llvm::SmallVectorImpl<llvm::MachineOperand>&, llvm::SmallVectorImpl<llvm::MachineInstr*>&, bool) const /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/Target/X86/X86InstrInfo.cpp:6093:22
>>     #1 0x2f6e7ad in llvm::X86InstrInfo::analyzeBranch(llvm::MachineBasicBlock&, llvm::MachineBasicBlock*&, llvm::MachineBasicBlock*&, llvm::SmallVectorImpl<llvm::MachineOperand>&, bool) const /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/Target/X86/X86InstrInfo.cpp:6133:10
>>     #2 0x3d390dc in llvm::TailDuplicator::tailDuplicate(bool, llvm::MachineBasicBlock*, llvm::MachineBasicBlock*, llvm::SmallVectorImpl<llvm::MachineBasicBlock*>&, llvm::SmallVectorImpl<llvm::MachineInstr*>&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/TailDuplicator.cpp:837:10    #3 0x3d35e4a in llvm::TailDuplicator::tailDuplicateAndUpdate(bool, llvm::MachineBasicBlock*, llvm::MachineBasicBlock*, llvm::SmallVectorImpl<llvm::MachineBasicBlock*>*, llvm::function_ref<void (llvm::MachineBasicBlock*)>*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/TailDuplicator.cpp:148:8
>>     #4 0x3f2451b in (anonymous namespace)::MachineBlockPlacement::maybeTailDuplicateBlock(llvm::MachineBasicBlock*, llvm::MachineBasicBlock*, (anonymous namespace)::BlockChain&, llvm::SmallSetVector<llvm::MachineBasicBlock const*, 16u>*, llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::MachineBasicBlock, true, false, void>, false, false>&, bool&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2620:11
>>     #5 0x3f0a25d in repeatedlyTailDuplicateBlock /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2501:13
>>     #6 0x3f0a25d in (anonymous namespace)::MachineBlockPlacement::buildChain(llvm::MachineBasicBlock const*, (anonymous namespace)::BlockChain&, llvm::SmallSetVector<llvm::MachineBasicBlock const*, 16u>*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:1699
>>     #7 0x3ef1c70 in (anonymous namespace)::MachineBlockPlacement::buildCFGChains() /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2278:3
>>     #8 0x3eec7a9 in (anonymous namespace)::MachineBlockPlacement::runOnMachineFunction(llvm::MachineFunction&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2696:3
>>     #9 0x39a8ffc in llvm::MachineFunctionPass::runOnFunction(llvm::Function&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineFunctionPass.cpp:62:13
>>     #10 0x431de6e in llvm::FPPassManager::runOnFunction(llvm::Function&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1519:27
>>     #11 0x431e534 in llvm::FPPassManager::runOnModule(llvm::Module&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1540:16
>>     #12 0x431f853 in runOnModule /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1596:27
>>     #13 0x431f853 in llvm::legacy::PassManagerImpl::run(llvm::Module&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1699
>>     #14 0x577960e in EmitAssembly /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp:792:19
>>     #15 0x577960e in clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::DataLayout const&, llvm::Module*, clang::BackendAction, std::__1::unique_ptr<llvm::raw_pwrite_stream, std::__1::default_delete<llvm::raw_pwrite_stream> >) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp:1101
>>     #16 0x6f1b90e in clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp:258:7
>>     #17 0x7ec96b6 in clang::ParseAST(clang::Sema&, bool, bool) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Parse/ParseAST.cpp:159:13
>>     #18 0x64339ba in clang::FrontendAction::Execute() /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Frontend/FrontendAction.cpp:837:8
>>     #19 0x637572c in clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp:970:11
>>     #20 0x65fedd5 in clang::ExecuteCompilerInvocation(clang::CompilerInstance*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:249:25
>>     #21 0x977555 in cc1_main(llvm::ArrayRef<char const*>, char const*, void*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/cc1_main.cpp:221:13
>>     #22 0x971417 in ExecuteCC1Tool /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/driver.cpp:299:12
>>     #23 0x971417 in main /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/driver.cpp:380
>>     #24 0x7fee6047d82f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f)
>>     #25 0x8f1468 in _start (/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm_build_msan/bin/clang-5.0+0x8f1468)
>>
>>
>>
>> On Mon, May 15, 2017 at 7:20 PM, Vitaly Buka <vitalybuka at google.com>
>> wrote:
>>
>>> Thanks, I've updated the test in r303136
>>>
>>> On Mon, May 15, 2017 at 7:14 PM Dmitry Vyukov <dvyukov at google.com>
>>> wrote:
>>>
>>>> I've benchmarks this change on the tsan benchmark:
>>>> projects/compiler-rt/lib/tsan/tests/rtl/TsanRtlTest
>>>> --gtest_also_run_disabled_tests --gtest_filter=DISABLED_BENCH.Mop*
>>>> (some numbers below)
>>>>
>>>> So let's just update check_analyze for the new reality.
>>>>
>>>>
>>>> $ egrep "OK.*Mop1Read" /tmp/old
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5317 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5907 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5356 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5376 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5339 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5226 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5261 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5464 ms)
>>>> $ egrep "OK.*Mop1Read" /tmp/new
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5324 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5238 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5344 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5327 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5373 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5208 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5332 ms)
>>>> [       OK ] DISABLED_BENCH.Mop1Read (5351 ms)
>>>>
>>>>
>>>> $ egrep "OK.*Mop8Write" /tmp/old
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1624 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1623 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1575 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1625 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1619 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1585 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1586 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1627 ms)
>>>> $ egrep "OK.*Mop8Write" /tmp/new
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1760 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1634 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1630 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1632 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1601 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1599 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1589 ms)
>>>> [       OK ] DISABLED_BENCH.Mop8Write (1625 ms)
>>>>
>>>>
>>>>
>>>> On Mon, May 15, 2017 at 5:10 PM, Vitaly Buka <vitalybuka at google.com>
>>>> wrote:
>>>> > +Dmitry Vyukov
>>>> >
>>>> > On Mon, May 15, 2017 at 5:09 PM Vitaly Buka <vitalybuka at google.com>
>>>> wrote:
>>>> >>
>>>> >> This test is broken after the patch:
>>>> >> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-aut
>>>> oconf/builds/8452/steps/tsan%20analyze/logs/stdio
>>>> >>
>>>> >> On Mon, May 15, 2017 at 10:44 AM Kyle Butt via llvm-commits
>>>> >> <llvm-commits at lists.llvm.org> wrote:
>>>> >>>
>>>> >>> Author: iteratee
>>>> >>> Date: Mon May 15 12:30:47 2017
>>>> >>> New Revision: 303084
>>>> >>>
>>>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=303084&view=rev
>>>> >>> Log:
>>>> >>> CodeGen: BlockPlacement: Increase tail duplication size for O3.
>>>> >>>
>>>> >>> At O3 we are more willing to increase size if we believe it will
>>>> improve
>>>> >>> performance. The current threshold for tail-duplication of 2
>>>> instructions
>>>> >>> is
>>>> >>> conservative, and can be relaxed at O3.
>>>> >>>
>>>> >>> Benchmark results:
>>>> >>> llvm test-suite:
>>>> >>> 6% improvement in aha, due to duplication of loop latch
>>>> >>> 3% improvement in hexxagon
>>>> >>>
>>>> >>> 2% slowdown in lpbench. Seems related, but couldn't completely
>>>> diagnose.
>>>> >>>
>>>> >>> Internal google benchmark:
>>>> >>> Produces 4% improvement on internal google protocol buffer
>>>> serialization
>>>> >>> benchmarks.
>>>> >>>
>>>> >>> Differential-Revision: https://reviews.llvm.org/D32324
>>>> >>>
>>>> >>> Modified:
>>>> >>>     llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>>>> >>>     llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll
>>>> >>>     llvm/trunk/test/CodeGen/X86/sse1.ll
>>>> >>>
>>>> >>> Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>>>> >>> URL:
>>>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/M
>>>> achineBlockPlacement.cpp?rev=303084&r1=303083&r2=303084&view=diff
>>>> >>>
>>>> >>> ============================================================
>>>> ==================
>>>> >>> --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
>>>> >>> +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Mon May 15
>>>> 12:30:47
>>>> >>> 2017
>>>> >>> @@ -133,6 +133,14 @@ static cl::opt<unsigned> TailDupPlacemen
>>>> >>>               "that won't conflict."), cl::init(2),
>>>> >>>      cl::Hidden);
>>>> >>>
>>>> >>> +// Heuristic for aggressive tail duplication.
>>>> >>> +static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
>>>> >>> +    "tail-dup-placement-aggressive-threshold",
>>>> >>> +    cl::desc("Instruction cutoff for aggressive tail duplication
>>>> during
>>>> >>> "
>>>> >>> +             "layout. Used at -O3. Tail merging during layout is
>>>> forced
>>>> >>> to "
>>>> >>> +             "have a threshold that won't conflict."), cl::init(3),
>>>> >>> +    cl::Hidden);
>>>> >>> +
>>>> >>>  // Heuristic for tail duplication.
>>>> >>>  static cl::opt<unsigned> TailDupPlacementPenalty(
>>>> >>>      "tail-dup-placement-penalty",
>>>> >>> @@ -2646,9 +2654,26 @@ bool MachineBlockPlacement::runOnMachine
>>>> >>>    assert(BlockToChain.empty());
>>>> >>>    assert(ComputedEdges.empty());
>>>> >>>
>>>> >>> +  unsigned TailDupSize = TailDupPlacementThreshold;
>>>> >>> +  // If only the aggressive threshold is explicitly set, use it.
>>>> >>> +  if (TailDupPlacementAggressiveThreshold.getNumOccurrences() !=
>>>> 0 &&
>>>> >>> +      TailDupPlacementThreshold.getNumOccurrences() == 0)
>>>> >>> +    TailDupSize = TailDupPlacementAggressiveThreshold;
>>>> >>> +
>>>> >>> +  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
>>>> >>> +  // For agressive optimization, we can adjust some thresholds to
>>>> be
>>>> >>> less
>>>> >>> +  // conservative.
>>>> >>> +  if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
>>>> >>> +    // At O3 we should be more willing to copy blocks for tail
>>>> >>> duplication. This
>>>> >>> +    // increases size pressure, so we only do it at O3
>>>> >>> +    // Do this unless only the regular threshold is explicitly set.
>>>> >>> +    if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
>>>> >>> +        TailDupPlacementAggressiveThreshold.getNumOccurrences()
>>>> != 0)
>>>> >>> +      TailDupSize = TailDupPlacementAggressiveThreshold;
>>>> >>> +  }
>>>> >>> +
>>>> >>>    if (TailDupPlacement) {
>>>> >>>      MPDT = &getAnalysis<MachinePostDominatorTree>();
>>>> >>> -    unsigned TailDupSize = TailDupPlacementThreshold;
>>>> >>>      if (MF.getFunction()->optForSize())
>>>> >>>        TailDupSize = 1;
>>>> >>>      TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
>>>> >>> @@ -2658,7 +2683,6 @@ bool MachineBlockPlacement::runOnMachine
>>>> >>>    buildCFGChains();
>>>> >>>
>>>> >>>    // Changing the layout can create new tail merging opportunities.
>>>> >>> -  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
>>>> >>>    // TailMerge can create jump into if branches that make CFG
>>>> >>> irreducible for
>>>> >>>    // HW that requires structured CFG.
>>>> >>>    bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG()
>>>> &&
>>>> >>> @@ -2666,7 +2690,7 @@ bool MachineBlockPlacement::runOnMachine
>>>> >>>                           BranchFoldPlacement;
>>>> >>>    // No tail merging opportunities if the block number is less than
>>>> >>> four.
>>>> >>>    if (MF.size() > 3 && EnableTailMerge) {
>>>> >>> -    unsigned TailMergeSize = TailDupPlacementThreshold + 1;
>>>> >>> +    unsigned TailMergeSize = TailDupSize + 1;
>>>> >>>      BranchFolder BF(/*EnableTailMerge=*/true,
>>>> /*CommonHoist=*/false,
>>>> >>> *MBFI,
>>>> >>>                      *MBPI, TailMergeSize);
>>>> >>>
>>>> >>>
>>>> >>> Modified: llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll
>>>> >>> URL:
>>>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>>>> PowerPC/tail-dup-layout.ll?rev=303084&r1=303083&r2=303084&view=diff
>>>> >>>
>>>> >>> ============================================================
>>>> ==================
>>>> >>> --- llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll (original)
>>>> >>> +++ llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll Mon May 15
>>>> >>> 12:30:47 2017
>>>> >>> @@ -1,4 +1,5 @@
>>>> >>> -; RUN: llc -O2 < %s | FileCheck %s
>>>> >>> +; RUN: llc -O2 -o - %s | FileCheck --check-prefix=CHECK
>>>> >>> --check-prefix=CHECK-O2 %s
>>>> >>> +; RUN: llc -O3 -o - %s | FileCheck --check-prefix=CHECK
>>>> >>> --check-prefix=CHECK-O3 %s
>>>> >>>  target datalayout = "e-m:e-i64:64-n32:64"
>>>> >>>  target triple = "powerpc64le-grtev4-linux-gnu"
>>>> >>>
>>>> >>> @@ -99,11 +100,9 @@ exit:
>>>> >>>  ; test1
>>>> >>>  ; test2
>>>> >>>  ; test3
>>>> >>> -; test4
>>>> >>>  ; optional1
>>>> >>>  ; optional2
>>>> >>>  ; optional3
>>>> >>> -; optional4
>>>> >>>  ; exit
>>>> >>>  ; even for 50/50 branches.
>>>> >>>  ; Tail duplication puts test n+1 at the end of optional n
>>>> >>> @@ -157,6 +156,98 @@ test3:
>>>> >>>    br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
>>>> >>>  optional3:
>>>> >>>    call void @c()
>>>> >>> +  br label %exit
>>>> >>> +exit:
>>>> >>> +  ret void
>>>> >>> +}
>>>> >>> +
>>>> >>> +; Intended layout:
>>>> >>> +; The chain-of-triangles based duplicating produces the layout
>>>> when 3
>>>> >>> +; instructions are allowed for tail-duplication.
>>>> >>> +; test1
>>>> >>> +; test2
>>>> >>> +; test3
>>>> >>> +; optional1
>>>> >>> +; optional2
>>>> >>> +; optional3
>>>> >>> +; exit
>>>> >>> +;
>>>> >>> +; Otherwise it produces the layout:
>>>> >>> +; test1
>>>> >>> +; optional1
>>>> >>> +; test2
>>>> >>> +; optional2
>>>> >>> +; test3
>>>> >>> +; optional3
>>>> >>> +; exit
>>>> >>> +
>>>> >>> +;CHECK-LABEL: straight_test_3_instr_test:
>>>> >>> +; test1 may have been merged with entry
>>>> >>> +;CHECK: mr [[TAGREG:[0-9]+]], 3
>>>> >>> +;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
>>>> >>> +;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
>>>> >>> +
>>>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
>>>> >>> +;CHECK-O3-NEXT: # %test2
>>>> >>> +;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
>>>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
>>>> >>> +;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
>>>> >>> +;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
>>>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
>>>> >>> +;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
>>>> >>> +;CHECK-O3: blr
>>>> >>> +;CHECK-O3-NEXT: .[[OPT1LABEL]]:
>>>> >>> +;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
>>>> >>> +;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
>>>> >>> +;CHECK-O3-NEXT: .[[OPT2LABEL]]:
>>>> >>> +;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>>>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
>>>> >>> +;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
>>>> >>> +;CHECK-O3-NEXT: .[[OPT3LABEL]]:
>>>> >>> +;CHECK-O3: b .[[EXITLABEL]]
>>>> >>> +
>>>> >>> +;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
>>>> >>> +;CHECK-O2-NEXT: # %optional1
>>>> >>> +;CHECK-O2: .[[TEST2LABEL]]: # %test2
>>>> >>> +;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>>>> >>> +;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
>>>> >>> +;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
>>>> >>> +;CHECK-O2-NEXT: # %optional2
>>>> >>> +;CHECK-O2: .[[TEST3LABEL]]: # %test3
>>>> >>> +;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>>>> >>> +;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
>>>> >>> +;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
>>>> >>> +;CHECK-O2-NEXT: # %optional3
>>>> >>> +;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
>>>> >>> +;CHECK-O2: blr
>>>> >>> +
>>>> >>> +
>>>> >>> +define void @straight_test_3_instr_test(i32 %tag) {
>>>> >>> +entry:
>>>> >>> +  br label %test1
>>>> >>> +test1:
>>>> >>> +  %tagbit1 = and i32 %tag, 3
>>>> >>> +  %tagbit1eq0 = icmp eq i32 %tagbit1, 2
>>>> >>> +  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
>>>> >>> +optional1:
>>>> >>> +  call void @a()
>>>> >>> +  br label %test2
>>>> >>> +test2:
>>>> >>> +  %tagbit2 = and i32 %tag, 12
>>>> >>> +  %tagbit2eq0 = icmp eq i32 %tagbit2, 8
>>>> >>> +  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
>>>> >>> +optional2:
>>>> >>> +  call void @b()
>>>> >>> +  br label %test3
>>>> >>> +test3:
>>>> >>> +  %tagbit3 = and i32 %tag, 48
>>>> >>> +  %tagbit3eq0 = icmp eq i32 %tagbit3, 32
>>>> >>> +  br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
>>>> >>> +optional3:
>>>> >>> +  call void @c()
>>>> >>>    br label %exit
>>>> >>>  exit:
>>>> >>>    ret void
>>>> >>>
>>>> >>> Modified: llvm/trunk/test/CodeGen/X86/sse1.ll
>>>> >>> URL:
>>>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>>>> X86/sse1.ll?rev=303084&r1=303083&r2=303084&view=diff
>>>> >>>
>>>> >>> ============================================================
>>>> ==================
>>>> >>> --- llvm/trunk/test/CodeGen/X86/sse1.ll (original)
>>>> >>> +++ llvm/trunk/test/CodeGen/X86/sse1.ll Mon May 15 12:30:47 2017
>>>> >>> @@ -66,7 +66,10 @@ define <4 x float> @vselect(<4 x float>*
>>>> >>>  ; X32-NEXT:    jne .LBB1_8
>>>> >>>  ; X32-NEXT:  .LBB1_7:
>>>> >>>  ; X32-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
>>>> >>> -; X32-NEXT:    jmp .LBB1_9
>>>> >>> +; X32-NEXT:    unpcklps {{.*#+}} xmm2 =
>>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>>> >>> +; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
>>>> >>> +; X32-NEXT:    je .LBB1_10
>>>> >>> +; X32-NEXT:    jmp .LBB1_11
>>>> >>>  ; X32-NEXT:  .LBB1_1:
>>>> >>>  ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
>>>> >>>  ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
>>>> >>> @@ -77,11 +80,10 @@ define <4 x float> @vselect(<4 x float>*
>>>> >>>  ; X32-NEXT:    je .LBB1_7
>>>> >>>  ; X32-NEXT:  .LBB1_8: # %entry
>>>> >>>  ; X32-NEXT:    xorps %xmm3, %xmm3
>>>> >>> -; X32-NEXT:  .LBB1_9: # %entry
>>>> >>>  ; X32-NEXT:    unpcklps {{.*#+}} xmm2 =
>>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>>> >>>  ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
>>>> >>>  ; X32-NEXT:    jne .LBB1_11
>>>> >>> -; X32-NEXT:  # BB#10:
>>>> >>> +; X32-NEXT:  .LBB1_10:
>>>> >>>  ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
>>>> >>>  ; X32-NEXT:  .LBB1_11: # %entry
>>>> >>>  ; X32-NEXT:    unpcklps {{.*#+}} xmm0 =
>>>> xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>>>> >>> @@ -103,7 +105,10 @@ define <4 x float> @vselect(<4 x float>*
>>>> >>>  ; X64-NEXT:    jne .LBB1_8
>>>> >>>  ; X64-NEXT:  .LBB1_7:
>>>> >>>  ; X64-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
>>>> >>> -; X64-NEXT:    jmp .LBB1_9
>>>> >>> +; X64-NEXT:    unpcklps {{.*#+}} xmm2 =
>>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>>> >>> +; X64-NEXT:    testl %esi, %esi
>>>> >>> +; X64-NEXT:    je .LBB1_10
>>>> >>> +; X64-NEXT:    jmp .LBB1_11
>>>> >>>  ; X64-NEXT:  .LBB1_1:
>>>> >>>  ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
>>>> >>>  ; X64-NEXT:    testl %edx, %edx
>>>> >>> @@ -114,11 +119,10 @@ define <4 x float> @vselect(<4 x float>*
>>>> >>>  ; X64-NEXT:    je .LBB1_7
>>>> >>>  ; X64-NEXT:  .LBB1_8: # %entry
>>>> >>>  ; X64-NEXT:    xorps %xmm3, %xmm3
>>>> >>> -; X64-NEXT:  .LBB1_9: # %entry
>>>> >>>  ; X64-NEXT:    unpcklps {{.*#+}} xmm2 =
>>>> xmm2[0],xmm3[0],xmm2[1],xmm3[1]
>>>> >>>  ; X64-NEXT:    testl %esi, %esi
>>>> >>>  ; X64-NEXT:    jne .LBB1_11
>>>> >>> -; X64-NEXT:  # BB#10:
>>>> >>> +; X64-NEXT:  .LBB1_10:
>>>> >>>  ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
>>>> >>>  ; X64-NEXT:  .LBB1_11: # %entry
>>>> >>>  ; X64-NEXT:    unpcklps {{.*#+}} xmm0 =
>>>> xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>>>> >>>
>>>> >>>
>>>> >>> _______________________________________________
>>>> >>> llvm-commits mailing list
>>>> >>> llvm-commits at lists.llvm.org
>>>> >>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170522/8ba229ef/attachment.html>


More information about the llvm-commits mailing list