[llvm] r224611 - merge consecutive stores of extracted vector elements
Sanjay Patel
spatel at rotateright.com
Wed Dec 31 08:10:10 PST 2014
Thanks, Alexey. Nadav reviewed the patch outside of Phab, so I just made a
note of that in the commit message. I'll look into the crash soon.
On Tue, Dec 30, 2014 at 5:48 PM, Alexey Samsonov <vonosmas at gmail.com> wrote:
> Sorry, I had to revert this commit in r225031.
>
> This commit was sent for review in http://reviews.llvm.org/D6698, but was
> later submitted without any comments from reviewers.
> Maybe, that review thread is a good place to get feedback and discuss the
> correct fix.
>
> Repro instructions:
> $ cat tmp/b.c
> __extension__ typedef unsigned long long int uint64_t;
>
> #define NLIMBS 9
> typedef uint64_t felem[NLIMBS];
> uint64_t mask;
>
> void select_point(felem out[4]);
>
> void batch_mul() {
> felem tmp[4];
> for (int i = 520; i >= 0; --i) {
> select_point(tmp);
> {
> uint64_t *out = tmp[3];
> const uint64_t *in = tmp[1];
> static const uint64_t two62m3 = -1U << 5;
> static const uint64_t two62m2 = -1U << 4;
> out[0] = two62m3 - in[0];
> out[1] = two62m2 - in[1];
> out[2] = two62m2 - in[2];
> out[3] = two62m2 - in[3];
> out[4] = two62m2 - in[4];
> out[5] = two62m2 - in[5];
> out[6] = two62m2 - in[6];
> out[7] = two62m2 - in[7];
> out[8] = two62m2 - in[8];
> }
>
> {
> uint64_t *out = tmp[1];
> const uint64_t *in = tmp[3];
>
> for (unsigned int i = 0; i < NLIMBS; ++i) {
> const uint64_t tmp = mask & (in[i] ^ out[i]);
> out[i] ^= tmp;
> }
> }
> }
> }
>
>
> Operand not processed?
> 0x5c4c770: v2i64,ch = load 0x5c4ac60, 0x5c4c660,
> 0x5c45850<LD16[%sunkaddr115](tbaa=<0x5b6c2a8>)> [ORD=61] [ID=1]
>
> UNREACHABLE executed at lib/CodeGen/SelectionDAG/LegalizeTypes.cpp:438!
> #0 0x14b8498 llvm::sys::PrintStackTrace(_IO_FILE*)
> (bin/clang-3.6+0x14b8498)
> #1 0x14b9a2b SignalHandler(int) (bin/clang-3.6+0x14b9a2b)
> #2 0x7f11fbfff340 __restore_rt
> (/lib/x86_64-linux-gnu/libpthread.so.0+0x10340)
> #3 0x7f11fb220bb9 gsignal
> /build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0
> #4 0x7f11fb223fc8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0
> #5 0x1484373 llvm::llvm_unreachable_internal(char const*, char const*,
> unsigned int) (bin/clang-3.6+0x1484373)
> #6 0x1885dce llvm::DAGTypeLegalizer::run() (bin/clang-3.6+0x1885dce)
> #7 0x188b294 llvm::SelectionDAG::LegalizeTypes() (bin/clang-3.6+0x188b294)
> #8 0x184c6c7 llvm::SelectionDAGISel::CodeGenAndEmitDAG()
> (bin/clang-3.6+0x184c6c7)
> #9 0x184b9b8 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function
> const&) (bin/clang-3.6+0x184b9b8)
> #10 0x1848634
> llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&)
> (bin/clang-3.6+0x1848634)
> #11 0xc85a96 (anonymous
> namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&)
> (bin/clang-3.6+0xc85a96)
> #12 0xf7f5bc llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
> (bin/clang-3.6+0xf7f5bc)
> #13 0x11b53f3 llvm::FPPassManager::runOnFunction(llvm::Function&)
> (bin/clang-3.6+0x11b53f3)
> #14 0x11b566b llvm::FPPassManager::runOnModule(llvm::Module&)
> (bin/clang-3.6+0x11b566b)
> #15 0x11b5bc7 llvm::legacy::PassManagerImpl::run(llvm::Module&)
> (bin/clang-3.6+0x11b5bc7)
> #16 0x1905f5d clang::EmitBackendOutput(clang::DiagnosticsEngine&,
> clang::CodeGenOptions const&, clang::TargetOptions const&,
> clang::LangOptions const&, llvm::StringRef, llvm::Module*,
> clang::BackendAction, llvm::raw_ostream*) (bin/clang-3.6+0x1905f5d)
> #17 0x18fab95
> clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&)
> (bin/clang-3.6+0x18fab95)
> #18 0x1e8c793 clang::ParseAST(clang::Sema&, bool, bool)
> (bin/clang-3.6+0x1e8c793)
> #19 0x166104e clang::FrontendAction::Execute() (bin/clang-3.6+0x166104e)
> #20 0x163207c
> clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
> (bin/clang-3.6+0x163207c)
> #21 0x16e561a clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
> (bin/clang-3.6+0x16e561a)
> #22 0x6ca3d2 cc1_main(llvm::ArrayRef<char const*>, char const*, void*)
> (bin/clang-3.6+0x6ca3d2)
> #23 0x6c914a main (bin/clang-3.6+0x6c914a)
> #24 0x7f11fb20bec5 __libc_start_main
> /build/buildd/eglibc-2.19/csu/libc-start.c:321:0
> #25 0x6c612b _start (bin/clang-3.6+0x6c612b)
> Stack dump:
> 0. Program arguments: bin/clang -cc1 -triple x86_64-unknown-linux-gnu
> -emit-obj -mnoexecstack -main-file-name b.c -mrelocation-model pic
> -pic-level 2 -mthread-model posix -mdisable-fp-elim -target-cpu x86-64
> -target-feature +sse3 -ffunction-sections -fdata-sections -O2 -ferror-limit
> 19 -fmessage-length 0 -mstackrealign -fno-signed-char -fobjc-runtime=gcc
> -fdiagnostics-show-option -fcolor-diagnostics -vectorize-loops
> -vectorize-slp -x c tmp/b.c
> 1. <eof> parser at end of file
> 2. Code generation
> 3. Running pass 'Function Pass Manager' on module 'tmp/b.c'.
> 4. Running pass 'X86 DAG->DAG Instruction Selection' on function
> '@batch_mul'
>
>
> On Fri, Dec 19, 2014 at 12:23 PM, Sanjay Patel <spatel at rotateright.com>
> wrote:
>
>> Author: spatel
>> Date: Fri Dec 19 14:23:41 2014
>> New Revision: 224611
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=224611&view=rev
>> Log:
>> merge consecutive stores of extracted vector elements
>>
>> Add a path to DAGCombiner::MergeConsecutiveStores()
>> to combine multiple scalar stores when the store operands
>> are extracted vector elements. This is a partial fix for
>> PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
>>
>> For the new test case, codegen improves from:
>>
>> vmovss %xmm0, (%rdi)
>> vextractps $1, %xmm0, 4(%rdi)
>> vextractps $2, %xmm0, 8(%rdi)
>> vextractps $3, %xmm0, 12(%rdi)
>> vextractf128 $1, %ymm0, %xmm0
>> vmovss %xmm0, 16(%rdi)
>> vextractps $1, %xmm0, 20(%rdi)
>> vextractps $2, %xmm0, 24(%rdi)
>> vextractps $3, %xmm0, 28(%rdi)
>> vzeroupper
>> retq
>>
>> To:
>>
>> vmovups %ymm0, (%rdi)
>> vzeroupper
>> retq
>>
>> Patch reviewed by Nadav Rotem.
>>
>> Differential Revision: http://reviews.llvm.org/D6698
>>
>>
>> Modified:
>> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=224611&r1=224610&r2=224611&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Dec 19
>> 14:23:41 2014
>> @@ -9498,11 +9498,14 @@ bool DAGCombiner::MergeConsecutiveStores
>> return false;
>>
>> // Perform an early exit check. Do not bother looking at stored values
>> that
>> - // are not constants or loads.
>> + // are not constants, loads, or extracted vector elements.
>> SDValue StoredVal = St->getValue();
>> bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
>> - if (!isa<ConstantSDNode>(StoredVal) &&
>> !isa<ConstantFPSDNode>(StoredVal) &&
>> - !IsLoadSrc)
>> + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
>> + isa<ConstantFPSDNode>(StoredVal);
>> + bool IsExtractVecEltSrc = (StoredVal.getOpcode() ==
>> ISD::EXTRACT_VECTOR_ELT);
>> +
>> + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
>> return false;
>>
>> // Only look at ends of store sequences.
>> @@ -9644,7 +9647,7 @@ bool DAGCombiner::MergeConsecutiveStores
>> LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
>>
>> // Store the constants into memory as one consecutive store.
>> - if (!IsLoadSrc) {
>> + if (IsConstantSrc) {
>> unsigned LastLegalType = 0;
>> unsigned LastLegalVectorType = 0;
>> bool NonZero = false;
>> @@ -9769,6 +9772,74 @@ bool DAGCombiner::MergeConsecutiveStores
>> while (!St->use_empty())
>> DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
>> deleteAndRecombine(St);
>> + }
>> +
>> + return true;
>> + }
>> +
>> + // When extracting multiple vector elements, try to store them
>> + // in one vector store rather than a sequence of scalar stores.
>> + if (IsExtractVecEltSrc) {
>> + unsigned NumElem = 0;
>> + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
>> + // Find a legal type for the vector store.
>> + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
>> + if (TLI.isTypeLegal(Ty))
>> + NumElem = i + 1;
>> + }
>> +
>> + // Make sure we have a legal type and something to merge.
>> + if (NumElem < 2)
>> + return false;
>> +
>> + unsigned EarliestNodeUsed = 0;
>> + for (unsigned i=0; i < NumElem; ++i) {
>> + // Find a chain for the new wide-store operand. Notice that some
>> + // of the store nodes that we found may not be selected for
>> inclusion
>> + // in the wide store. The chain we use needs to be the chain of the
>> + // earliest store node which is *used* and replaced by the wide
>> store.
>> + if (StoreNodes[i].SequenceNum >
>> StoreNodes[EarliestNodeUsed].SequenceNum)
>> + EarliestNodeUsed = i;
>> + }
>> +
>> + // The earliest Node in the DAG.
>> + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
>> + SDLoc DL(StoreNodes[0].MemNode);
>> +
>> + SDValue StoredVal;
>> +
>> + // Find a legal type for the vector store.
>> + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
>> +
>> + SmallVector<SDValue, 8> Ops;
>> + for (unsigned i = 0; i < NumElem ; ++i) {
>> + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
>> + SDValue Val = St->getValue();
>> + // All of the operands of a BUILD_VECTOR must have the same type.
>> + if (Val.getValueType() != MemVT)
>> + return false;
>> + Ops.push_back(Val);
>> + }
>> +
>> + // Build the extracted vector elements back into a vector.
>> + StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
>> +
>> + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL,
>> StoredVal,
>> + FirstInChain->getBasePtr(),
>> + FirstInChain->getPointerInfo(),
>> + false, false,
>> + FirstInChain->getAlignment());
>> +
>> + // Replace the first store with the new store
>> + CombineTo(EarliestOp, NewStore);
>> + // Erase all other stores.
>> + for (unsigned i = 0; i < NumElem ; ++i) {
>> + if (StoreNodes[i].MemNode == EarliestOp)
>> + continue;
>> + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
>> + while (!St->use_empty())
>> + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
>> + deleteAndRecombine(St);
>> }
>>
>> return true;
>>
>> Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=224611&r1=224610&r2=224611&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Fri Dec 19
>> 14:23:41 2014
>> @@ -434,3 +434,36 @@ define void @loadStoreBaseIndexOffsetSex
>> ; <label>:14
>> ret void
>> }
>> +
>> +define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
>> + %vecext0 = extractelement <8 x float> %v, i32 0
>> + %vecext1 = extractelement <8 x float> %v, i32 1
>> + %vecext2 = extractelement <8 x float> %v, i32 2
>> + %vecext3 = extractelement <8 x float> %v, i32 3
>> + %vecext4 = extractelement <8 x float> %v, i32 4
>> + %vecext5 = extractelement <8 x float> %v, i32 5
>> + %vecext6 = extractelement <8 x float> %v, i32 6
>> + %vecext7 = extractelement <8 x float> %v, i32 7
>> + %arrayidx1 = getelementptr inbounds float* %ptr, i64 1
>> + %arrayidx2 = getelementptr inbounds float* %ptr, i64 2
>> + %arrayidx3 = getelementptr inbounds float* %ptr, i64 3
>> + %arrayidx4 = getelementptr inbounds float* %ptr, i64 4
>> + %arrayidx5 = getelementptr inbounds float* %ptr, i64 5
>> + %arrayidx6 = getelementptr inbounds float* %ptr, i64 6
>> + %arrayidx7 = getelementptr inbounds float* %ptr, i64 7
>> + store float %vecext0, float* %ptr, align 4
>> + store float %vecext1, float* %arrayidx1, align 4
>> + store float %vecext2, float* %arrayidx2, align 4
>> + store float %vecext3, float* %arrayidx3, align 4
>> + store float %vecext4, float* %arrayidx4, align 4
>> + store float %vecext5, float* %arrayidx5, align 4
>> + store float %vecext6, float* %arrayidx6, align 4
>> + store float %vecext7, float* %arrayidx7, align 4
>> + ret void
>> +
>> +; CHECK-LABEL: merge_vec_element_store
>> +; CHECK: vmovups
>> +; CHECK-NEXT: vzeroupper
>> +; CHECK-NEXT: retq
>> +}
>> +
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
>
> --
> Alexey Samsonov
> vonosmas at gmail.com
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141231/10344761/attachment.html>
More information about the llvm-commits
mailing list