[llvm] r303084 - CodeGen: BlockPlacement: Increase tail duplication size for O3.

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Mon May 22 13:54:33 PDT 2017


This is also caused by the patch:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/1393/steps/check-llvm%20check-clang%20stage3%2Fmsan/logs/stdio

FAILED: tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o

/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm_build_msan/bin/clang++
  -DGTEST_HAS_RTTI=0 -DLLVM_BUILD_GLOBAL_ISEL -D_DEBUG -D_GNU_SOURCE
-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS
-Itools/llvm-xray
-I/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/llvm-xray
-Iinclude -I/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/include
-fPIC -fvisibility-inlines-hidden -Werror=date-time -std=c++11 -Wall
-W -Wno-unused-parameter -Wwrite-strings -Wcast-qual
-Wmissing-field-initializers -pedantic -Wno-long-long
-Wcovered-switch-default -Wnon-virtual-dtor -Wdelete-non-virtual-dtor
-Wstring-conversion -fcolor-diagnostics -ffunction-sections
-fdata-sections -O3    -UNDEBUG  -fno-exceptions -fno-rtti -MD -MT
tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o -MF
tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o.d -o
tools/llvm-xray/CMakeFiles/llvm-xray.dir/xray-color-helper.cc.o -c
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/llvm-xray/xray-color-helper.cc
==7293==WARNING: MemorySanitizer: use-of-uninitialized-value
    #0 0x2f6e6c8 in
llvm::X86InstrInfo::AnalyzeBranchImpl(llvm::MachineBasicBlock&,
llvm::MachineBasicBlock*&, llvm::MachineBasicBlock*&,
llvm::SmallVectorImpl<llvm::MachineOperand>&,
llvm::SmallVectorImpl<llvm::MachineInstr*>&, bool) const
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/Target/X86/X86InstrInfo.cpp:6093:22
    #1 0x2f6e7ad in
llvm::X86InstrInfo::analyzeBranch(llvm::MachineBasicBlock&,
llvm::MachineBasicBlock*&, llvm::MachineBasicBlock*&,
llvm::SmallVectorImpl<llvm::MachineOperand>&, bool) const
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/Target/X86/X86InstrInfo.cpp:6133:10
    #2 0x3d390dc in llvm::TailDuplicator::tailDuplicate(bool,
llvm::MachineBasicBlock*, llvm::MachineBasicBlock*,
llvm::SmallVectorImpl<llvm::MachineBasicBlock*>&,
llvm::SmallVectorImpl<llvm::MachineInstr*>&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/TailDuplicator.cpp:837:10
   #3 0x3d35e4a in llvm::TailDuplicator::tailDuplicateAndUpdate(bool,
llvm::MachineBasicBlock*, llvm::MachineBasicBlock*,
llvm::SmallVectorImpl<llvm::MachineBasicBlock*>*,
llvm::function_ref<void (llvm::MachineBasicBlock*)>*)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/TailDuplicator.cpp:148:8
    #4 0x3f2451b in (anonymous
namespace)::MachineBlockPlacement::maybeTailDuplicateBlock(llvm::MachineBasicBlock*,
llvm::MachineBasicBlock*, (anonymous namespace)::BlockChain&,
llvm::SmallSetVector<llvm::MachineBasicBlock const*, 16u>*,
llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::MachineBasicBlock,
true, false, void>, false, false>&, bool&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2620:11
    #5 0x3f0a25d in repeatedlyTailDuplicateBlock
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2501:13
    #6 0x3f0a25d in (anonymous
namespace)::MachineBlockPlacement::buildChain(llvm::MachineBasicBlock
const*, (anonymous namespace)::BlockChain&,
llvm::SmallSetVector<llvm::MachineBasicBlock const*, 16u>*)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:1699
    #7 0x3ef1c70 in (anonymous
namespace)::MachineBlockPlacement::buildCFGChains()
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2278:3
    #8 0x3eec7a9 in (anonymous
namespace)::MachineBlockPlacement::runOnMachineFunction(llvm::MachineFunction&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineBlockPlacement.cpp:2696:3
    #9 0x39a8ffc in
llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/CodeGen/MachineFunctionPass.cpp:62:13
    #10 0x431de6e in
llvm::FPPassManager::runOnFunction(llvm::Function&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1519:27
    #11 0x431e534 in llvm::FPPassManager::runOnModule(llvm::Module&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1540:16
    #12 0x431f853 in runOnModule
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1596:27
    #13 0x431f853 in llvm::legacy::PassManagerImpl::run(llvm::Module&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/lib/IR/LegacyPassManager.cpp:1699
    #14 0x577960e in EmitAssembly
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp:792:19
    #15 0x577960e in
clang::EmitBackendOutput(clang::DiagnosticsEngine&,
clang::HeaderSearchOptions const&, clang::CodeGenOptions const&,
clang::TargetOptions const&, clang::LangOptions const&,
llvm::DataLayout const&, llvm::Module*, clang::BackendAction,
std::__1::unique_ptr<llvm::raw_pwrite_stream,
std::__1::default_delete<llvm::raw_pwrite_stream> >)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp:1101
    #16 0x6f1b90e in
clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp:258:7
    #17 0x7ec96b6 in clang::ParseAST(clang::Sema&, bool, bool)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Parse/ParseAST.cpp:159:13
    #18 0x64339ba in clang::FrontendAction::Execute()
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Frontend/FrontendAction.cpp:837:8
    #19 0x637572c in
clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp:970:11
    #20 0x65fedd5 in
clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:249:25
    #21 0x977555 in cc1_main(llvm::ArrayRef<char const*>, char const*,
void*) /mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/cc1_main.cpp:221:13
    #22 0x971417 in ExecuteCC1Tool
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/driver.cpp:299:12
    #23 0x971417 in main
/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm/tools/clang/tools/driver/driver.cpp:380
    #24 0x7fee6047d82f in __libc_start_main
(/lib/x86_64-linux-gnu/libc.so.6+0x2082f)
    #25 0x8f1468 in _start
(/mnt/b/sanitizer-buildbot2/sanitizer-x86_64-linux-bootstrap/build/llvm_build_msan/bin/clang-5.0+0x8f1468)



On Mon, May 15, 2017 at 7:20 PM, Vitaly Buka <vitalybuka at google.com> wrote:

> Thanks, I've updated the test in r303136
>
> On Mon, May 15, 2017 at 7:14 PM Dmitry Vyukov <dvyukov at google.com> wrote:
>
>> I've benchmarks this change on the tsan benchmark:
>> projects/compiler-rt/lib/tsan/tests/rtl/TsanRtlTest
>> --gtest_also_run_disabled_tests --gtest_filter=DISABLED_BENCH.Mop*
>> (some numbers below)
>>
>> So let's just update check_analyze for the new reality.
>>
>>
>> $ egrep "OK.*Mop1Read" /tmp/old
>> [       OK ] DISABLED_BENCH.Mop1Read (5317 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5907 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5356 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5376 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5339 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5226 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5261 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5464 ms)
>> $ egrep "OK.*Mop1Read" /tmp/new
>> [       OK ] DISABLED_BENCH.Mop1Read (5324 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5238 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5344 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5327 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5373 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5208 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5332 ms)
>> [       OK ] DISABLED_BENCH.Mop1Read (5351 ms)
>>
>>
>> $ egrep "OK.*Mop8Write" /tmp/old
>> [       OK ] DISABLED_BENCH.Mop8Write (1624 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1623 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1575 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1625 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1619 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1585 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1586 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1627 ms)
>> $ egrep "OK.*Mop8Write" /tmp/new
>> [       OK ] DISABLED_BENCH.Mop8Write (1760 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1634 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1630 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1632 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1601 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1599 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1589 ms)
>> [       OK ] DISABLED_BENCH.Mop8Write (1625 ms)
>>
>>
>>
>> On Mon, May 15, 2017 at 5:10 PM, Vitaly Buka <vitalybuka at google.com>
>> wrote:
>> > +Dmitry Vyukov
>> >
>> > On Mon, May 15, 2017 at 5:09 PM Vitaly Buka <vitalybuka at google.com>
>> wrote:
>> >>
>> >> This test is broken after the patch:
>> >> http://lab.llvm.org:8011/builders/sanitizer-x86_64-
>> linux-autoconf/builds/8452/steps/tsan%20analyze/logs/stdio
>> >>
>> >> On Mon, May 15, 2017 at 10:44 AM Kyle Butt via llvm-commits
>> >> <llvm-commits at lists.llvm.org> wrote:
>> >>>
>> >>> Author: iteratee
>> >>> Date: Mon May 15 12:30:47 2017
>> >>> New Revision: 303084
>> >>>
>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=303084&view=rev
>> >>> Log:
>> >>> CodeGen: BlockPlacement: Increase tail duplication size for O3.
>> >>>
>> >>> At O3 we are more willing to increase size if we believe it will
>> improve
>> >>> performance. The current threshold for tail-duplication of 2
>> instructions
>> >>> is
>> >>> conservative, and can be relaxed at O3.
>> >>>
>> >>> Benchmark results:
>> >>> llvm test-suite:
>> >>> 6% improvement in aha, due to duplication of loop latch
>> >>> 3% improvement in hexxagon
>> >>>
>> >>> 2% slowdown in lpbench. Seems related, but couldn't completely
>> diagnose.
>> >>>
>> >>> Internal google benchmark:
>> >>> Produces 4% improvement on internal google protocol buffer
>> serialization
>> >>> benchmarks.
>> >>>
>> >>> Differential-Revision: https://reviews.llvm.org/D32324
>> >>>
>> >>> Modified:
>> >>>     llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>> >>>     llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll
>> >>>     llvm/trunk/test/CodeGen/X86/sse1.ll
>> >>>
>> >>> Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>> >>> URL:
>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/
>> CodeGen/MachineBlockPlacement.cpp?rev=303084&r1=303083&r2=
>> 303084&view=diff
>> >>>
>> >>> ============================================================
>> ==================
>> >>> --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
>> >>> +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Mon May 15
>> 12:30:47
>> >>> 2017
>> >>> @@ -133,6 +133,14 @@ static cl::opt<unsigned> TailDupPlacemen
>> >>>               "that won't conflict."), cl::init(2),
>> >>>      cl::Hidden);
>> >>>
>> >>> +// Heuristic for aggressive tail duplication.
>> >>> +static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
>> >>> +    "tail-dup-placement-aggressive-threshold",
>> >>> +    cl::desc("Instruction cutoff for aggressive tail duplication
>> during
>> >>> "
>> >>> +             "layout. Used at -O3. Tail merging during layout is
>> forced
>> >>> to "
>> >>> +             "have a threshold that won't conflict."), cl::init(3),
>> >>> +    cl::Hidden);
>> >>> +
>> >>>  // Heuristic for tail duplication.
>> >>>  static cl::opt<unsigned> TailDupPlacementPenalty(
>> >>>      "tail-dup-placement-penalty",
>> >>> @@ -2646,9 +2654,26 @@ bool MachineBlockPlacement::runOnMachine
>> >>>    assert(BlockToChain.empty());
>> >>>    assert(ComputedEdges.empty());
>> >>>
>> >>> +  unsigned TailDupSize = TailDupPlacementThreshold;
>> >>> +  // If only the aggressive threshold is explicitly set, use it.
>> >>> +  if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0
>> &&
>> >>> +      TailDupPlacementThreshold.getNumOccurrences() == 0)
>> >>> +    TailDupSize = TailDupPlacementAggressiveThreshold;
>> >>> +
>> >>> +  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
>> >>> +  // For agressive optimization, we can adjust some thresholds to be
>> >>> less
>> >>> +  // conservative.
>> >>> +  if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
>> >>> +    // At O3 we should be more willing to copy blocks for tail
>> >>> duplication. This
>> >>> +    // increases size pressure, so we only do it at O3
>> >>> +    // Do this unless only the regular threshold is explicitly set.
>> >>> +    if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
>> >>> +        TailDupPlacementAggressiveThreshold.getNumOccurrences() !=
>> 0)
>> >>> +      TailDupSize = TailDupPlacementAggressiveThreshold;
>> >>> +  }
>> >>> +
>> >>>    if (TailDupPlacement) {
>> >>>      MPDT = &getAnalysis<MachinePostDominatorTree>();
>> >>> -    unsigned TailDupSize = TailDupPlacementThreshold;
>> >>>      if (MF.getFunction()->optForSize())
>> >>>        TailDupSize = 1;
>> >>>      TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
>> >>> @@ -2658,7 +2683,6 @@ bool MachineBlockPlacement::runOnMachine
>> >>>    buildCFGChains();
>> >>>
>> >>>    // Changing the layout can create new tail merging opportunities.
>> >>> -  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
>> >>>    // TailMerge can create jump into if branches that make CFG
>> >>> irreducible for
>> >>>    // HW that requires structured CFG.
>> >>>    bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
>> >>> @@ -2666,7 +2690,7 @@ bool MachineBlockPlacement::runOnMachine
>> >>>                           BranchFoldPlacement;
>> >>>    // No tail merging opportunities if the block number is less than
>> >>> four.
>> >>>    if (MF.size() > 3 && EnableTailMerge) {
>> >>> -    unsigned TailMergeSize = TailDupPlacementThreshold + 1;
>> >>> +    unsigned TailMergeSize = TailDupSize + 1;
>> >>>      BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false,
>> >>> *MBFI,
>> >>>                      *MBPI, TailMergeSize);
>> >>>
>> >>>
>> >>> Modified: llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll
>> >>> URL:
>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>> CodeGen/PowerPC/tail-dup-layout.ll?rev=303084&r1=
>> 303083&r2=303084&view=diff
>> >>>
>> >>> ============================================================
>> ==================
>> >>> --- llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll (original)
>> >>> +++ llvm/trunk/test/CodeGen/PowerPC/tail-dup-layout.ll Mon May 15
>> >>> 12:30:47 2017
>> >>> @@ -1,4 +1,5 @@
>> >>> -; RUN: llc -O2 < %s | FileCheck %s
>> >>> +; RUN: llc -O2 -o - %s | FileCheck --check-prefix=CHECK
>> >>> --check-prefix=CHECK-O2 %s
>> >>> +; RUN: llc -O3 -o - %s | FileCheck --check-prefix=CHECK
>> >>> --check-prefix=CHECK-O3 %s
>> >>>  target datalayout = "e-m:e-i64:64-n32:64"
>> >>>  target triple = "powerpc64le-grtev4-linux-gnu"
>> >>>
>> >>> @@ -99,11 +100,9 @@ exit:
>> >>>  ; test1
>> >>>  ; test2
>> >>>  ; test3
>> >>> -; test4
>> >>>  ; optional1
>> >>>  ; optional2
>> >>>  ; optional3
>> >>> -; optional4
>> >>>  ; exit
>> >>>  ; even for 50/50 branches.
>> >>>  ; Tail duplication puts test n+1 at the end of optional n
>> >>> @@ -157,6 +156,98 @@ test3:
>> >>>    br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
>> >>>  optional3:
>> >>>    call void @c()
>> >>> +  br label %exit
>> >>> +exit:
>> >>> +  ret void
>> >>> +}
>> >>> +
>> >>> +; Intended layout:
>> >>> +; The chain-of-triangles based duplicating produces the layout when 3
>> >>> +; instructions are allowed for tail-duplication.
>> >>> +; test1
>> >>> +; test2
>> >>> +; test3
>> >>> +; optional1
>> >>> +; optional2
>> >>> +; optional3
>> >>> +; exit
>> >>> +;
>> >>> +; Otherwise it produces the layout:
>> >>> +; test1
>> >>> +; optional1
>> >>> +; test2
>> >>> +; optional2
>> >>> +; test3
>> >>> +; optional3
>> >>> +; exit
>> >>> +
>> >>> +;CHECK-LABEL: straight_test_3_instr_test:
>> >>> +; test1 may have been merged with entry
>> >>> +;CHECK: mr [[TAGREG:[0-9]+]], 3
>> >>> +;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
>> >>> +;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
>> >>> +
>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
>> >>> +;CHECK-O3-NEXT: # %test2
>> >>> +;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
>> >>> +;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
>> >>> +;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
>> >>> +;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
>> >>> +;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
>> >>> +;CHECK-O3: blr
>> >>> +;CHECK-O3-NEXT: .[[OPT1LABEL]]:
>> >>> +;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
>> >>> +;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
>> >>> +;CHECK-O3-NEXT: .[[OPT2LABEL]]:
>> >>> +;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>> >>> +;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
>> >>> +;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
>> >>> +;CHECK-O3-NEXT: .[[OPT3LABEL]]:
>> >>> +;CHECK-O3: b .[[EXITLABEL]]
>> >>> +
>> >>> +;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
>> >>> +;CHECK-O2-NEXT: # %optional1
>> >>> +;CHECK-O2: .[[TEST2LABEL]]: # %test2
>> >>> +;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
>> >>> +;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
>> >>> +;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
>> >>> +;CHECK-O2-NEXT: # %optional2
>> >>> +;CHECK-O2: .[[TEST3LABEL]]: # %test3
>> >>> +;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
>> >>> +;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
>> >>> +;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
>> >>> +;CHECK-O2-NEXT: # %optional3
>> >>> +;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
>> >>> +;CHECK-O2: blr
>> >>> +
>> >>> +
>> >>> +define void @straight_test_3_instr_test(i32 %tag) {
>> >>> +entry:
>> >>> +  br label %test1
>> >>> +test1:
>> >>> +  %tagbit1 = and i32 %tag, 3
>> >>> +  %tagbit1eq0 = icmp eq i32 %tagbit1, 2
>> >>> +  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
>> >>> +optional1:
>> >>> +  call void @a()
>> >>> +  br label %test2
>> >>> +test2:
>> >>> +  %tagbit2 = and i32 %tag, 12
>> >>> +  %tagbit2eq0 = icmp eq i32 %tagbit2, 8
>> >>> +  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
>> >>> +optional2:
>> >>> +  call void @b()
>> >>> +  br label %test3
>> >>> +test3:
>> >>> +  %tagbit3 = and i32 %tag, 48
>> >>> +  %tagbit3eq0 = icmp eq i32 %tagbit3, 32
>> >>> +  br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
>> >>> +optional3:
>> >>> +  call void @c()
>> >>>    br label %exit
>> >>>  exit:
>> >>>    ret void
>> >>>
>> >>> Modified: llvm/trunk/test/CodeGen/X86/sse1.ll
>> >>> URL:
>> >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>> CodeGen/X86/sse1.ll?rev=303084&r1=303083&r2=303084&view=diff
>> >>>
>> >>> ============================================================
>> ==================
>> >>> --- llvm/trunk/test/CodeGen/X86/sse1.ll (original)
>> >>> +++ llvm/trunk/test/CodeGen/X86/sse1.ll Mon May 15 12:30:47 2017
>> >>> @@ -66,7 +66,10 @@ define <4 x float> @vselect(<4 x float>*
>> >>>  ; X32-NEXT:    jne .LBB1_8
>> >>>  ; X32-NEXT:  .LBB1_7:
>> >>>  ; X32-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
>> >>> -; X32-NEXT:    jmp .LBB1_9
>> >>> +; X32-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[
>> 1]
>> >>> +; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
>> >>> +; X32-NEXT:    je .LBB1_10
>> >>> +; X32-NEXT:    jmp .LBB1_11
>> >>>  ; X32-NEXT:  .LBB1_1:
>> >>>  ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
>> >>>  ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
>> >>> @@ -77,11 +80,10 @@ define <4 x float> @vselect(<4 x float>*
>> >>>  ; X32-NEXT:    je .LBB1_7
>> >>>  ; X32-NEXT:  .LBB1_8: # %entry
>> >>>  ; X32-NEXT:    xorps %xmm3, %xmm3
>> >>> -; X32-NEXT:  .LBB1_9: # %entry
>> >>>  ; X32-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[
>> 1]
>> >>>  ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
>> >>>  ; X32-NEXT:    jne .LBB1_11
>> >>> -; X32-NEXT:  # BB#10:
>> >>> +; X32-NEXT:  .LBB1_10:
>> >>>  ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
>> >>>  ; X32-NEXT:  .LBB1_11: # %entry
>> >>>  ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[
>> 1]
>> >>> @@ -103,7 +105,10 @@ define <4 x float> @vselect(<4 x float>*
>> >>>  ; X64-NEXT:    jne .LBB1_8
>> >>>  ; X64-NEXT:  .LBB1_7:
>> >>>  ; X64-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
>> >>> -; X64-NEXT:    jmp .LBB1_9
>> >>> +; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[
>> 1]
>> >>> +; X64-NEXT:    testl %esi, %esi
>> >>> +; X64-NEXT:    je .LBB1_10
>> >>> +; X64-NEXT:    jmp .LBB1_11
>> >>>  ; X64-NEXT:  .LBB1_1:
>> >>>  ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
>> >>>  ; X64-NEXT:    testl %edx, %edx
>> >>> @@ -114,11 +119,10 @@ define <4 x float> @vselect(<4 x float>*
>> >>>  ; X64-NEXT:    je .LBB1_7
>> >>>  ; X64-NEXT:  .LBB1_8: # %entry
>> >>>  ; X64-NEXT:    xorps %xmm3, %xmm3
>> >>> -; X64-NEXT:  .LBB1_9: # %entry
>> >>>  ; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[
>> 1]
>> >>>  ; X64-NEXT:    testl %esi, %esi
>> >>>  ; X64-NEXT:    jne .LBB1_11
>> >>> -; X64-NEXT:  # BB#10:
>> >>> +; X64-NEXT:  .LBB1_10:
>> >>>  ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
>> >>>  ; X64-NEXT:  .LBB1_11: # %entry
>> >>>  ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[
>> 1]
>> >>>
>> >>>
>> >>> _______________________________________________
>> >>> llvm-commits mailing list
>> >>> llvm-commits at lists.llvm.org
>> >>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170522/f8abe3b8/attachment.html>


More information about the llvm-commits mailing list