[llvm] r224611 - merge consecutive stores of extracted vector elements

Alexey Samsonov vonosmas at gmail.com
Tue Dec 30 16:48:49 PST 2014


Sorry, I had to revert this commit in r225031.

This commit was sent for review in http://reviews.llvm.org/D6698, but was
later submitted without any comments from reviewers.
Maybe, that review thread is a good place to get feedback and discuss the
correct fix.

Repro instructions:
$ cat tmp/b.c
__extension__ typedef unsigned long long int uint64_t;

#define NLIMBS 9
typedef uint64_t felem[NLIMBS];
uint64_t mask;

void select_point(felem out[4]);

void batch_mul() {
  felem tmp[4];
  for (int i = 520; i >= 0; --i) {
    select_point(tmp);
    {
      uint64_t *out = tmp[3];
      const uint64_t *in = tmp[1];
      static const uint64_t two62m3 = -1U << 5;
      static const uint64_t two62m2 = -1U << 4;
      out[0] = two62m3 - in[0];
      out[1] = two62m2 - in[1];
      out[2] = two62m2 - in[2];
      out[3] = two62m2 - in[3];
      out[4] = two62m2 - in[4];
      out[5] = two62m2 - in[5];
      out[6] = two62m2 - in[6];
      out[7] = two62m2 - in[7];
      out[8] = two62m2 - in[8];
    }

    {
      uint64_t *out = tmp[1];
      const uint64_t *in = tmp[3];

      for (unsigned int i = 0; i < NLIMBS; ++i) {
        const uint64_t tmp = mask & (in[i] ^ out[i]);
        out[i] ^= tmp;
      }
    }
  }
}


Operand not processed?
0x5c4c770: v2i64,ch = load 0x5c4ac60, 0x5c4c660,
0x5c45850<LD16[%sunkaddr115](tbaa=<0x5b6c2a8>)> [ORD=61] [ID=1]

UNREACHABLE executed at lib/CodeGen/SelectionDAG/LegalizeTypes.cpp:438!
#0 0x14b8498 llvm::sys::PrintStackTrace(_IO_FILE*) (bin/clang-3.6+0x14b8498)
#1 0x14b9a2b SignalHandler(int) (bin/clang-3.6+0x14b9a2b)
#2 0x7f11fbfff340 __restore_rt
(/lib/x86_64-linux-gnu/libpthread.so.0+0x10340)
#3 0x7f11fb220bb9 gsignal
/build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0
#4 0x7f11fb223fc8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0
#5 0x1484373 llvm::llvm_unreachable_internal(char const*, char const*,
unsigned int) (bin/clang-3.6+0x1484373)
#6 0x1885dce llvm::DAGTypeLegalizer::run() (bin/clang-3.6+0x1885dce)
#7 0x188b294 llvm::SelectionDAG::LegalizeTypes() (bin/clang-3.6+0x188b294)
#8 0x184c6c7 llvm::SelectionDAGISel::CodeGenAndEmitDAG()
(bin/clang-3.6+0x184c6c7)
#9 0x184b9b8 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function
const&) (bin/clang-3.6+0x184b9b8)
#10 0x1848634
llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&)
(bin/clang-3.6+0x1848634)
#11 0xc85a96 (anonymous
namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&)
(bin/clang-3.6+0xc85a96)
#12 0xf7f5bc llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
(bin/clang-3.6+0xf7f5bc)
#13 0x11b53f3 llvm::FPPassManager::runOnFunction(llvm::Function&)
(bin/clang-3.6+0x11b53f3)
#14 0x11b566b llvm::FPPassManager::runOnModule(llvm::Module&)
(bin/clang-3.6+0x11b566b)
#15 0x11b5bc7 llvm::legacy::PassManagerImpl::run(llvm::Module&)
(bin/clang-3.6+0x11b5bc7)
#16 0x1905f5d clang::EmitBackendOutput(clang::DiagnosticsEngine&,
clang::CodeGenOptions const&, clang::TargetOptions const&,
clang::LangOptions const&, llvm::StringRef, llvm::Module*,
clang::BackendAction, llvm::raw_ostream*) (bin/clang-3.6+0x1905f5d)
#17 0x18fab95
clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&)
(bin/clang-3.6+0x18fab95)
#18 0x1e8c793 clang::ParseAST(clang::Sema&, bool, bool)
(bin/clang-3.6+0x1e8c793)
#19 0x166104e clang::FrontendAction::Execute() (bin/clang-3.6+0x166104e)
#20 0x163207c
clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
(bin/clang-3.6+0x163207c)
#21 0x16e561a clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
(bin/clang-3.6+0x16e561a)
#22 0x6ca3d2 cc1_main(llvm::ArrayRef<char const*>, char const*, void*)
(bin/clang-3.6+0x6ca3d2)
#23 0x6c914a main (bin/clang-3.6+0x6c914a)
#24 0x7f11fb20bec5 __libc_start_main
/build/buildd/eglibc-2.19/csu/libc-start.c:321:0
#25 0x6c612b _start (bin/clang-3.6+0x6c612b)
Stack dump:
0. Program arguments: bin/clang -cc1 -triple x86_64-unknown-linux-gnu
-emit-obj -mnoexecstack -main-file-name b.c -mrelocation-model pic
-pic-level 2 -mthread-model posix -mdisable-fp-elim -target-cpu x86-64
-target-feature +sse3 -ffunction-sections -fdata-sections -O2 -ferror-limit
19 -fmessage-length 0 -mstackrealign -fno-signed-char -fobjc-runtime=gcc
-fdiagnostics-show-option -fcolor-diagnostics -vectorize-loops
-vectorize-slp -x c tmp/b.c
1. <eof> parser at end of file
2. Code generation
3. Running pass 'Function Pass Manager' on module 'tmp/b.c'.
4. Running pass 'X86 DAG->DAG Instruction Selection' on function
'@batch_mul'


On Fri, Dec 19, 2014 at 12:23 PM, Sanjay Patel <spatel at rotateright.com>
wrote:

> Author: spatel
> Date: Fri Dec 19 14:23:41 2014
> New Revision: 224611
>
> URL: http://llvm.org/viewvc/llvm-project?rev=224611&view=rev
> Log:
> merge consecutive stores of extracted vector elements
>
> Add a path to DAGCombiner::MergeConsecutiveStores()
> to combine multiple scalar stores when the store operands
> are extracted vector elements. This is a partial fix for
> PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
>
> For the new test case, codegen improves from:
>
>    vmovss  %xmm0, (%rdi)
>    vextractps      $1, %xmm0, 4(%rdi)
>    vextractps      $2, %xmm0, 8(%rdi)
>    vextractps      $3, %xmm0, 12(%rdi)
>    vextractf128    $1, %ymm0, %xmm0
>    vmovss  %xmm0, 16(%rdi)
>    vextractps      $1, %xmm0, 20(%rdi)
>    vextractps      $2, %xmm0, 24(%rdi)
>    vextractps      $3, %xmm0, 28(%rdi)
>    vzeroupper
>    retq
>
> To:
>
>    vmovups      %ymm0, (%rdi)
>    vzeroupper
>    retq
>
> Patch reviewed by Nadav Rotem.
>
> Differential Revision: http://reviews.llvm.org/D6698
>
>
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>     llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=224611&r1=224610&r2=224611&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Dec 19
> 14:23:41 2014
> @@ -9498,11 +9498,14 @@ bool DAGCombiner::MergeConsecutiveStores
>      return false;
>
>    // Perform an early exit check. Do not bother looking at stored values
> that
> -  // are not constants or loads.
> +  // are not constants, loads, or extracted vector elements.
>    SDValue StoredVal = St->getValue();
>    bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
> -  if (!isa<ConstantSDNode>(StoredVal) &&
> !isa<ConstantFPSDNode>(StoredVal) &&
> -      !IsLoadSrc)
> +  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
> +                       isa<ConstantFPSDNode>(StoredVal);
> +  bool IsExtractVecEltSrc = (StoredVal.getOpcode() ==
> ISD::EXTRACT_VECTOR_ELT);
> +
> +  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
>      return false;
>
>    // Only look at ends of store sequences.
> @@ -9644,7 +9647,7 @@ bool DAGCombiner::MergeConsecutiveStores
>    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
>
>    // Store the constants into memory as one consecutive store.
> -  if (!IsLoadSrc) {
> +  if (IsConstantSrc) {
>      unsigned LastLegalType = 0;
>      unsigned LastLegalVectorType = 0;
>      bool NonZero = false;
> @@ -9769,6 +9772,74 @@ bool DAGCombiner::MergeConsecutiveStores
>        while (!St->use_empty())
>          DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
>        deleteAndRecombine(St);
> +    }
> +
> +    return true;
> +  }
> +
> +  // When extracting multiple vector elements, try to store them
> +  // in one vector store rather than a sequence of scalar stores.
> +  if (IsExtractVecEltSrc) {
> +    unsigned NumElem = 0;
> +    for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
> +      // Find a legal type for the vector store.
> +      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
> +      if (TLI.isTypeLegal(Ty))
> +        NumElem = i + 1;
> +    }
> +
> +    // Make sure we have a legal type and something to merge.
> +    if (NumElem < 2)
> +      return false;
> +
> +    unsigned EarliestNodeUsed = 0;
> +    for (unsigned i=0; i < NumElem; ++i) {
> +      // Find a chain for the new wide-store operand. Notice that some
> +      // of the store nodes that we found may not be selected for
> inclusion
> +      // in the wide store. The chain we use needs to be the chain of the
> +      // earliest store node which is *used* and replaced by the wide
> store.
> +      if (StoreNodes[i].SequenceNum >
> StoreNodes[EarliestNodeUsed].SequenceNum)
> +        EarliestNodeUsed = i;
> +    }
> +
> +    // The earliest Node in the DAG.
> +    LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
> +    SDLoc DL(StoreNodes[0].MemNode);
> +
> +    SDValue StoredVal;
> +
> +    // Find a legal type for the vector store.
> +    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
> +
> +    SmallVector<SDValue, 8> Ops;
> +    for (unsigned i = 0; i < NumElem ; ++i) {
> +      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> +      SDValue Val = St->getValue();
> +      // All of the operands of a BUILD_VECTOR must have the same type.
> +      if (Val.getValueType() != MemVT)
> +        return false;
> +      Ops.push_back(Val);
> +    }
> +
> +    // Build the extracted vector elements back into a vector.
> +    StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
> +
> +    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
> +                                    FirstInChain->getBasePtr(),
> +                                    FirstInChain->getPointerInfo(),
> +                                    false, false,
> +                                    FirstInChain->getAlignment());
> +
> +    // Replace the first store with the new store
> +    CombineTo(EarliestOp, NewStore);
> +    // Erase all other stores.
> +    for (unsigned i = 0; i < NumElem ; ++i) {
> +      if (StoreNodes[i].MemNode == EarliestOp)
> +        continue;
> +      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> +      while (!St->use_empty())
> +        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
> +      deleteAndRecombine(St);
>      }
>
>      return true;
>
> Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=224611&r1=224610&r2=224611&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Fri Dec 19
> 14:23:41 2014
> @@ -434,3 +434,36 @@ define void @loadStoreBaseIndexOffsetSex
>  ; <label>:14
>    ret void
>  }
> +
> +define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
> +  %vecext0 = extractelement <8 x float> %v, i32 0
> +  %vecext1 = extractelement <8 x float> %v, i32 1
> +  %vecext2 = extractelement <8 x float> %v, i32 2
> +  %vecext3 = extractelement <8 x float> %v, i32 3
> +  %vecext4 = extractelement <8 x float> %v, i32 4
> +  %vecext5 = extractelement <8 x float> %v, i32 5
> +  %vecext6 = extractelement <8 x float> %v, i32 6
> +  %vecext7 = extractelement <8 x float> %v, i32 7
> +  %arrayidx1 = getelementptr inbounds float* %ptr, i64 1
> +  %arrayidx2 = getelementptr inbounds float* %ptr, i64 2
> +  %arrayidx3 = getelementptr inbounds float* %ptr, i64 3
> +  %arrayidx4 = getelementptr inbounds float* %ptr, i64 4
> +  %arrayidx5 = getelementptr inbounds float* %ptr, i64 5
> +  %arrayidx6 = getelementptr inbounds float* %ptr, i64 6
> +  %arrayidx7 = getelementptr inbounds float* %ptr, i64 7
> +  store float %vecext0, float* %ptr, align 4
> +  store float %vecext1, float* %arrayidx1, align 4
> +  store float %vecext2, float* %arrayidx2, align 4
> +  store float %vecext3, float* %arrayidx3, align 4
> +  store float %vecext4, float* %arrayidx4, align 4
> +  store float %vecext5, float* %arrayidx5, align 4
> +  store float %vecext6, float* %arrayidx6, align 4
> +  store float %vecext7, float* %arrayidx7, align 4
> +  ret void
> +
> +; CHECK-LABEL: merge_vec_element_store
> +; CHECK: vmovups
> +; CHECK-NEXT: vzeroupper
> +; CHECK-NEXT: retq
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>



-- 
Alexey Samsonov
vonosmas at gmail.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141230/5ebeb5ce/attachment.html>


More information about the llvm-commits mailing list