[llvm] r224611 - merge consecutive stores of extracted vector elements

Sanjay Patel spatel at rotateright.com
Wed Dec 31 08:10:10 PST 2014


Thanks, Alexey. Nadav reviewed the patch outside of Phab, so I just made a
note of that in the commit message. I'll look into the crash soon.

On Tue, Dec 30, 2014 at 5:48 PM, Alexey Samsonov <vonosmas at gmail.com> wrote:

> Sorry, I had to revert this commit in r225031.
>
> This commit was sent for review in http://reviews.llvm.org/D6698, but was
> later submitted without any comments from reviewers.
> Maybe, that review thread is a good place to get feedback and discuss the
> correct fix.
>
> Repro instructions:
> $ cat tmp/b.c
> __extension__ typedef unsigned long long int uint64_t;
>
> #define NLIMBS 9
> typedef uint64_t felem[NLIMBS];
> uint64_t mask;
>
> void select_point(felem out[4]);
>
> void batch_mul() {
>   felem tmp[4];
>   for (int i = 520; i >= 0; --i) {
>     select_point(tmp);
>     {
>       uint64_t *out = tmp[3];
>       const uint64_t *in = tmp[1];
>       static const uint64_t two62m3 = -1U << 5;
>       static const uint64_t two62m2 = -1U << 4;
>       out[0] = two62m3 - in[0];
>       out[1] = two62m2 - in[1];
>       out[2] = two62m2 - in[2];
>       out[3] = two62m2 - in[3];
>       out[4] = two62m2 - in[4];
>       out[5] = two62m2 - in[5];
>       out[6] = two62m2 - in[6];
>       out[7] = two62m2 - in[7];
>       out[8] = two62m2 - in[8];
>     }
>
>     {
>       uint64_t *out = tmp[1];
>       const uint64_t *in = tmp[3];
>
>       for (unsigned int i = 0; i < NLIMBS; ++i) {
>         const uint64_t tmp = mask & (in[i] ^ out[i]);
>         out[i] ^= tmp;
>       }
>     }
>   }
> }
>
>
> Operand not processed?
> 0x5c4c770: v2i64,ch = load 0x5c4ac60, 0x5c4c660,
> 0x5c45850<LD16[%sunkaddr115](tbaa=<0x5b6c2a8>)> [ORD=61] [ID=1]
>
> UNREACHABLE executed at lib/CodeGen/SelectionDAG/LegalizeTypes.cpp:438!
> #0 0x14b8498 llvm::sys::PrintStackTrace(_IO_FILE*)
> (bin/clang-3.6+0x14b8498)
> #1 0x14b9a2b SignalHandler(int) (bin/clang-3.6+0x14b9a2b)
> #2 0x7f11fbfff340 __restore_rt
> (/lib/x86_64-linux-gnu/libpthread.so.0+0x10340)
> #3 0x7f11fb220bb9 gsignal
> /build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0
> #4 0x7f11fb223fc8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0
> #5 0x1484373 llvm::llvm_unreachable_internal(char const*, char const*,
> unsigned int) (bin/clang-3.6+0x1484373)
> #6 0x1885dce llvm::DAGTypeLegalizer::run() (bin/clang-3.6+0x1885dce)
> #7 0x188b294 llvm::SelectionDAG::LegalizeTypes() (bin/clang-3.6+0x188b294)
> #8 0x184c6c7 llvm::SelectionDAGISel::CodeGenAndEmitDAG()
> (bin/clang-3.6+0x184c6c7)
> #9 0x184b9b8 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function
> const&) (bin/clang-3.6+0x184b9b8)
> #10 0x1848634
> llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&)
> (bin/clang-3.6+0x1848634)
> #11 0xc85a96 (anonymous
> namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&)
> (bin/clang-3.6+0xc85a96)
> #12 0xf7f5bc llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
> (bin/clang-3.6+0xf7f5bc)
> #13 0x11b53f3 llvm::FPPassManager::runOnFunction(llvm::Function&)
> (bin/clang-3.6+0x11b53f3)
> #14 0x11b566b llvm::FPPassManager::runOnModule(llvm::Module&)
> (bin/clang-3.6+0x11b566b)
> #15 0x11b5bc7 llvm::legacy::PassManagerImpl::run(llvm::Module&)
> (bin/clang-3.6+0x11b5bc7)
> #16 0x1905f5d clang::EmitBackendOutput(clang::DiagnosticsEngine&,
> clang::CodeGenOptions const&, clang::TargetOptions const&,
> clang::LangOptions const&, llvm::StringRef, llvm::Module*,
> clang::BackendAction, llvm::raw_ostream*) (bin/clang-3.6+0x1905f5d)
> #17 0x18fab95
> clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&)
> (bin/clang-3.6+0x18fab95)
> #18 0x1e8c793 clang::ParseAST(clang::Sema&, bool, bool)
> (bin/clang-3.6+0x1e8c793)
> #19 0x166104e clang::FrontendAction::Execute() (bin/clang-3.6+0x166104e)
> #20 0x163207c
> clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
> (bin/clang-3.6+0x163207c)
> #21 0x16e561a clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
> (bin/clang-3.6+0x16e561a)
> #22 0x6ca3d2 cc1_main(llvm::ArrayRef<char const*>, char const*, void*)
> (bin/clang-3.6+0x6ca3d2)
> #23 0x6c914a main (bin/clang-3.6+0x6c914a)
> #24 0x7f11fb20bec5 __libc_start_main
> /build/buildd/eglibc-2.19/csu/libc-start.c:321:0
> #25 0x6c612b _start (bin/clang-3.6+0x6c612b)
> Stack dump:
> 0. Program arguments: bin/clang -cc1 -triple x86_64-unknown-linux-gnu
> -emit-obj -mnoexecstack -main-file-name b.c -mrelocation-model pic
> -pic-level 2 -mthread-model posix -mdisable-fp-elim -target-cpu x86-64
> -target-feature +sse3 -ffunction-sections -fdata-sections -O2 -ferror-limit
> 19 -fmessage-length 0 -mstackrealign -fno-signed-char -fobjc-runtime=gcc
> -fdiagnostics-show-option -fcolor-diagnostics -vectorize-loops
> -vectorize-slp -x c tmp/b.c
> 1. <eof> parser at end of file
> 2. Code generation
> 3. Running pass 'Function Pass Manager' on module 'tmp/b.c'.
> 4. Running pass 'X86 DAG->DAG Instruction Selection' on function
> '@batch_mul'
>
>
> On Fri, Dec 19, 2014 at 12:23 PM, Sanjay Patel <spatel at rotateright.com>
> wrote:
>
>> Author: spatel
>> Date: Fri Dec 19 14:23:41 2014
>> New Revision: 224611
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=224611&view=rev
>> Log:
>> merge consecutive stores of extracted vector elements
>>
>> Add a path to DAGCombiner::MergeConsecutiveStores()
>> to combine multiple scalar stores when the store operands
>> are extracted vector elements. This is a partial fix for
>> PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
>>
>> For the new test case, codegen improves from:
>>
>>    vmovss  %xmm0, (%rdi)
>>    vextractps      $1, %xmm0, 4(%rdi)
>>    vextractps      $2, %xmm0, 8(%rdi)
>>    vextractps      $3, %xmm0, 12(%rdi)
>>    vextractf128    $1, %ymm0, %xmm0
>>    vmovss  %xmm0, 16(%rdi)
>>    vextractps      $1, %xmm0, 20(%rdi)
>>    vextractps      $2, %xmm0, 24(%rdi)
>>    vextractps      $3, %xmm0, 28(%rdi)
>>    vzeroupper
>>    retq
>>
>> To:
>>
>>    vmovups      %ymm0, (%rdi)
>>    vzeroupper
>>    retq
>>
>> Patch reviewed by Nadav Rotem.
>>
>> Differential Revision: http://reviews.llvm.org/D6698
>>
>>
>> Modified:
>>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>     llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=224611&r1=224610&r2=224611&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Dec 19
>> 14:23:41 2014
>> @@ -9498,11 +9498,14 @@ bool DAGCombiner::MergeConsecutiveStores
>>      return false;
>>
>>    // Perform an early exit check. Do not bother looking at stored values
>> that
>> -  // are not constants or loads.
>> +  // are not constants, loads, or extracted vector elements.
>>    SDValue StoredVal = St->getValue();
>>    bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
>> -  if (!isa<ConstantSDNode>(StoredVal) &&
>> !isa<ConstantFPSDNode>(StoredVal) &&
>> -      !IsLoadSrc)
>> +  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
>> +                       isa<ConstantFPSDNode>(StoredVal);
>> +  bool IsExtractVecEltSrc = (StoredVal.getOpcode() ==
>> ISD::EXTRACT_VECTOR_ELT);
>> +
>> +  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
>>      return false;
>>
>>    // Only look at ends of store sequences.
>> @@ -9644,7 +9647,7 @@ bool DAGCombiner::MergeConsecutiveStores
>>    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
>>
>>    // Store the constants into memory as one consecutive store.
>> -  if (!IsLoadSrc) {
>> +  if (IsConstantSrc) {
>>      unsigned LastLegalType = 0;
>>      unsigned LastLegalVectorType = 0;
>>      bool NonZero = false;
>> @@ -9769,6 +9772,74 @@ bool DAGCombiner::MergeConsecutiveStores
>>        while (!St->use_empty())
>>          DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
>>        deleteAndRecombine(St);
>> +    }
>> +
>> +    return true;
>> +  }
>> +
>> +  // When extracting multiple vector elements, try to store them
>> +  // in one vector store rather than a sequence of scalar stores.
>> +  if (IsExtractVecEltSrc) {
>> +    unsigned NumElem = 0;
>> +    for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
>> +      // Find a legal type for the vector store.
>> +      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
>> +      if (TLI.isTypeLegal(Ty))
>> +        NumElem = i + 1;
>> +    }
>> +
>> +    // Make sure we have a legal type and something to merge.
>> +    if (NumElem < 2)
>> +      return false;
>> +
>> +    unsigned EarliestNodeUsed = 0;
>> +    for (unsigned i=0; i < NumElem; ++i) {
>> +      // Find a chain for the new wide-store operand. Notice that some
>> +      // of the store nodes that we found may not be selected for
>> inclusion
>> +      // in the wide store. The chain we use needs to be the chain of the
>> +      // earliest store node which is *used* and replaced by the wide
>> store.
>> +      if (StoreNodes[i].SequenceNum >
>> StoreNodes[EarliestNodeUsed].SequenceNum)
>> +        EarliestNodeUsed = i;
>> +    }
>> +
>> +    // The earliest Node in the DAG.
>> +    LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
>> +    SDLoc DL(StoreNodes[0].MemNode);
>> +
>> +    SDValue StoredVal;
>> +
>> +    // Find a legal type for the vector store.
>> +    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
>> +
>> +    SmallVector<SDValue, 8> Ops;
>> +    for (unsigned i = 0; i < NumElem ; ++i) {
>> +      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
>> +      SDValue Val = St->getValue();
>> +      // All of the operands of a BUILD_VECTOR must have the same type.
>> +      if (Val.getValueType() != MemVT)
>> +        return false;
>> +      Ops.push_back(Val);
>> +    }
>> +
>> +    // Build the extracted vector elements back into a vector.
>> +    StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
>> +
>> +    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL,
>> StoredVal,
>> +                                    FirstInChain->getBasePtr(),
>> +                                    FirstInChain->getPointerInfo(),
>> +                                    false, false,
>> +                                    FirstInChain->getAlignment());
>> +
>> +    // Replace the first store with the new store
>> +    CombineTo(EarliestOp, NewStore);
>> +    // Erase all other stores.
>> +    for (unsigned i = 0; i < NumElem ; ++i) {
>> +      if (StoreNodes[i].MemNode == EarliestOp)
>> +        continue;
>> +      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
>> +      while (!St->use_empty())
>> +        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
>> +      deleteAndRecombine(St);
>>      }
>>
>>      return true;
>>
>> Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=224611&r1=224610&r2=224611&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Fri Dec 19
>> 14:23:41 2014
>> @@ -434,3 +434,36 @@ define void @loadStoreBaseIndexOffsetSex
>>  ; <label>:14
>>    ret void
>>  }
>> +
>> +define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
>> +  %vecext0 = extractelement <8 x float> %v, i32 0
>> +  %vecext1 = extractelement <8 x float> %v, i32 1
>> +  %vecext2 = extractelement <8 x float> %v, i32 2
>> +  %vecext3 = extractelement <8 x float> %v, i32 3
>> +  %vecext4 = extractelement <8 x float> %v, i32 4
>> +  %vecext5 = extractelement <8 x float> %v, i32 5
>> +  %vecext6 = extractelement <8 x float> %v, i32 6
>> +  %vecext7 = extractelement <8 x float> %v, i32 7
>> +  %arrayidx1 = getelementptr inbounds float* %ptr, i64 1
>> +  %arrayidx2 = getelementptr inbounds float* %ptr, i64 2
>> +  %arrayidx3 = getelementptr inbounds float* %ptr, i64 3
>> +  %arrayidx4 = getelementptr inbounds float* %ptr, i64 4
>> +  %arrayidx5 = getelementptr inbounds float* %ptr, i64 5
>> +  %arrayidx6 = getelementptr inbounds float* %ptr, i64 6
>> +  %arrayidx7 = getelementptr inbounds float* %ptr, i64 7
>> +  store float %vecext0, float* %ptr, align 4
>> +  store float %vecext1, float* %arrayidx1, align 4
>> +  store float %vecext2, float* %arrayidx2, align 4
>> +  store float %vecext3, float* %arrayidx3, align 4
>> +  store float %vecext4, float* %arrayidx4, align 4
>> +  store float %vecext5, float* %arrayidx5, align 4
>> +  store float %vecext6, float* %arrayidx6, align 4
>> +  store float %vecext7, float* %arrayidx7, align 4
>> +  ret void
>> +
>> +; CHECK-LABEL: merge_vec_element_store
>> +; CHECK: vmovups
>> +; CHECK-NEXT: vzeroupper
>> +; CHECK-NEXT: retq
>> +}
>> +
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
>
> --
> Alexey Samsonov
> vonosmas at gmail.com
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141231/10344761/attachment.html>


More information about the llvm-commits mailing list