[llvm] r224611 - merge consecutive stores of extracted vector elements
Alexey Samsonov
vonosmas at gmail.com
Tue Dec 30 16:48:49 PST 2014
Sorry, I had to revert this commit in r225031.
This commit was sent for review in http://reviews.llvm.org/D6698, but was
later submitted without any comments from reviewers.
Maybe, that review thread is a good place to get feedback and discuss the
correct fix.
Repro instructions:
$ cat tmp/b.c
__extension__ typedef unsigned long long int uint64_t;
#define NLIMBS 9
typedef uint64_t felem[NLIMBS];
uint64_t mask;
void select_point(felem out[4]);
void batch_mul() {
felem tmp[4];
for (int i = 520; i >= 0; --i) {
select_point(tmp);
{
uint64_t *out = tmp[3];
const uint64_t *in = tmp[1];
static const uint64_t two62m3 = -1U << 5;
static const uint64_t two62m2 = -1U << 4;
out[0] = two62m3 - in[0];
out[1] = two62m2 - in[1];
out[2] = two62m2 - in[2];
out[3] = two62m2 - in[3];
out[4] = two62m2 - in[4];
out[5] = two62m2 - in[5];
out[6] = two62m2 - in[6];
out[7] = two62m2 - in[7];
out[8] = two62m2 - in[8];
}
{
uint64_t *out = tmp[1];
const uint64_t *in = tmp[3];
for (unsigned int i = 0; i < NLIMBS; ++i) {
const uint64_t tmp = mask & (in[i] ^ out[i]);
out[i] ^= tmp;
}
}
}
}
Operand not processed?
0x5c4c770: v2i64,ch = load 0x5c4ac60, 0x5c4c660,
0x5c45850<LD16[%sunkaddr115](tbaa=<0x5b6c2a8>)> [ORD=61] [ID=1]
UNREACHABLE executed at lib/CodeGen/SelectionDAG/LegalizeTypes.cpp:438!
#0 0x14b8498 llvm::sys::PrintStackTrace(_IO_FILE*) (bin/clang-3.6+0x14b8498)
#1 0x14b9a2b SignalHandler(int) (bin/clang-3.6+0x14b9a2b)
#2 0x7f11fbfff340 __restore_rt
(/lib/x86_64-linux-gnu/libpthread.so.0+0x10340)
#3 0x7f11fb220bb9 gsignal
/build/buildd/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0
#4 0x7f11fb223fc8 abort /build/buildd/eglibc-2.19/stdlib/abort.c:91:0
#5 0x1484373 llvm::llvm_unreachable_internal(char const*, char const*,
unsigned int) (bin/clang-3.6+0x1484373)
#6 0x1885dce llvm::DAGTypeLegalizer::run() (bin/clang-3.6+0x1885dce)
#7 0x188b294 llvm::SelectionDAG::LegalizeTypes() (bin/clang-3.6+0x188b294)
#8 0x184c6c7 llvm::SelectionDAGISel::CodeGenAndEmitDAG()
(bin/clang-3.6+0x184c6c7)
#9 0x184b9b8 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function
const&) (bin/clang-3.6+0x184b9b8)
#10 0x1848634
llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&)
(bin/clang-3.6+0x1848634)
#11 0xc85a96 (anonymous
namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&)
(bin/clang-3.6+0xc85a96)
#12 0xf7f5bc llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
(bin/clang-3.6+0xf7f5bc)
#13 0x11b53f3 llvm::FPPassManager::runOnFunction(llvm::Function&)
(bin/clang-3.6+0x11b53f3)
#14 0x11b566b llvm::FPPassManager::runOnModule(llvm::Module&)
(bin/clang-3.6+0x11b566b)
#15 0x11b5bc7 llvm::legacy::PassManagerImpl::run(llvm::Module&)
(bin/clang-3.6+0x11b5bc7)
#16 0x1905f5d clang::EmitBackendOutput(clang::DiagnosticsEngine&,
clang::CodeGenOptions const&, clang::TargetOptions const&,
clang::LangOptions const&, llvm::StringRef, llvm::Module*,
clang::BackendAction, llvm::raw_ostream*) (bin/clang-3.6+0x1905f5d)
#17 0x18fab95
clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&)
(bin/clang-3.6+0x18fab95)
#18 0x1e8c793 clang::ParseAST(clang::Sema&, bool, bool)
(bin/clang-3.6+0x1e8c793)
#19 0x166104e clang::FrontendAction::Execute() (bin/clang-3.6+0x166104e)
#20 0x163207c
clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
(bin/clang-3.6+0x163207c)
#21 0x16e561a clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
(bin/clang-3.6+0x16e561a)
#22 0x6ca3d2 cc1_main(llvm::ArrayRef<char const*>, char const*, void*)
(bin/clang-3.6+0x6ca3d2)
#23 0x6c914a main (bin/clang-3.6+0x6c914a)
#24 0x7f11fb20bec5 __libc_start_main
/build/buildd/eglibc-2.19/csu/libc-start.c:321:0
#25 0x6c612b _start (bin/clang-3.6+0x6c612b)
Stack dump:
0. Program arguments: bin/clang -cc1 -triple x86_64-unknown-linux-gnu
-emit-obj -mnoexecstack -main-file-name b.c -mrelocation-model pic
-pic-level 2 -mthread-model posix -mdisable-fp-elim -target-cpu x86-64
-target-feature +sse3 -ffunction-sections -fdata-sections -O2 -ferror-limit
19 -fmessage-length 0 -mstackrealign -fno-signed-char -fobjc-runtime=gcc
-fdiagnostics-show-option -fcolor-diagnostics -vectorize-loops
-vectorize-slp -x c tmp/b.c
1. <eof> parser at end of file
2. Code generation
3. Running pass 'Function Pass Manager' on module 'tmp/b.c'.
4. Running pass 'X86 DAG->DAG Instruction Selection' on function
'@batch_mul'
On Fri, Dec 19, 2014 at 12:23 PM, Sanjay Patel <spatel at rotateright.com>
wrote:
> Author: spatel
> Date: Fri Dec 19 14:23:41 2014
> New Revision: 224611
>
> URL: http://llvm.org/viewvc/llvm-project?rev=224611&view=rev
> Log:
> merge consecutive stores of extracted vector elements
>
> Add a path to DAGCombiner::MergeConsecutiveStores()
> to combine multiple scalar stores when the store operands
> are extracted vector elements. This is a partial fix for
> PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
>
> For the new test case, codegen improves from:
>
> vmovss %xmm0, (%rdi)
> vextractps $1, %xmm0, 4(%rdi)
> vextractps $2, %xmm0, 8(%rdi)
> vextractps $3, %xmm0, 12(%rdi)
> vextractf128 $1, %ymm0, %xmm0
> vmovss %xmm0, 16(%rdi)
> vextractps $1, %xmm0, 20(%rdi)
> vextractps $2, %xmm0, 24(%rdi)
> vextractps $3, %xmm0, 28(%rdi)
> vzeroupper
> retq
>
> To:
>
> vmovups %ymm0, (%rdi)
> vzeroupper
> retq
>
> Patch reviewed by Nadav Rotem.
>
> Differential Revision: http://reviews.llvm.org/D6698
>
>
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=224611&r1=224610&r2=224611&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Dec 19
> 14:23:41 2014
> @@ -9498,11 +9498,14 @@ bool DAGCombiner::MergeConsecutiveStores
> return false;
>
> // Perform an early exit check. Do not bother looking at stored values
> that
> - // are not constants or loads.
> + // are not constants, loads, or extracted vector elements.
> SDValue StoredVal = St->getValue();
> bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
> - if (!isa<ConstantSDNode>(StoredVal) &&
> !isa<ConstantFPSDNode>(StoredVal) &&
> - !IsLoadSrc)
> + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
> + isa<ConstantFPSDNode>(StoredVal);
> + bool IsExtractVecEltSrc = (StoredVal.getOpcode() ==
> ISD::EXTRACT_VECTOR_ELT);
> +
> + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
> return false;
>
> // Only look at ends of store sequences.
> @@ -9644,7 +9647,7 @@ bool DAGCombiner::MergeConsecutiveStores
> LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
>
> // Store the constants into memory as one consecutive store.
> - if (!IsLoadSrc) {
> + if (IsConstantSrc) {
> unsigned LastLegalType = 0;
> unsigned LastLegalVectorType = 0;
> bool NonZero = false;
> @@ -9769,6 +9772,74 @@ bool DAGCombiner::MergeConsecutiveStores
> while (!St->use_empty())
> DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
> deleteAndRecombine(St);
> + }
> +
> + return true;
> + }
> +
> + // When extracting multiple vector elements, try to store them
> + // in one vector store rather than a sequence of scalar stores.
> + if (IsExtractVecEltSrc) {
> + unsigned NumElem = 0;
> + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
> + // Find a legal type for the vector store.
> + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
> + if (TLI.isTypeLegal(Ty))
> + NumElem = i + 1;
> + }
> +
> + // Make sure we have a legal type and something to merge.
> + if (NumElem < 2)
> + return false;
> +
> + unsigned EarliestNodeUsed = 0;
> + for (unsigned i=0; i < NumElem; ++i) {
> + // Find a chain for the new wide-store operand. Notice that some
> + // of the store nodes that we found may not be selected for
> inclusion
> + // in the wide store. The chain we use needs to be the chain of the
> + // earliest store node which is *used* and replaced by the wide
> store.
> + if (StoreNodes[i].SequenceNum >
> StoreNodes[EarliestNodeUsed].SequenceNum)
> + EarliestNodeUsed = i;
> + }
> +
> + // The earliest Node in the DAG.
> + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
> + SDLoc DL(StoreNodes[0].MemNode);
> +
> + SDValue StoredVal;
> +
> + // Find a legal type for the vector store.
> + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
> +
> + SmallVector<SDValue, 8> Ops;
> + for (unsigned i = 0; i < NumElem ; ++i) {
> + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> + SDValue Val = St->getValue();
> + // All of the operands of a BUILD_VECTOR must have the same type.
> + if (Val.getValueType() != MemVT)
> + return false;
> + Ops.push_back(Val);
> + }
> +
> + // Build the extracted vector elements back into a vector.
> + StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
> +
> + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
> + FirstInChain->getBasePtr(),
> + FirstInChain->getPointerInfo(),
> + false, false,
> + FirstInChain->getAlignment());
> +
> + // Replace the first store with the new store
> + CombineTo(EarliestOp, NewStore);
> + // Erase all other stores.
> + for (unsigned i = 0; i < NumElem ; ++i) {
> + if (StoreNodes[i].MemNode == EarliestOp)
> + continue;
> + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> + while (!St->use_empty())
> + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
> + deleteAndRecombine(St);
> }
>
> return true;
>
> Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=224611&r1=224610&r2=224611&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Fri Dec 19
> 14:23:41 2014
> @@ -434,3 +434,36 @@ define void @loadStoreBaseIndexOffsetSex
> ; <label>:14
> ret void
> }
> +
> +define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
> + %vecext0 = extractelement <8 x float> %v, i32 0
> + %vecext1 = extractelement <8 x float> %v, i32 1
> + %vecext2 = extractelement <8 x float> %v, i32 2
> + %vecext3 = extractelement <8 x float> %v, i32 3
> + %vecext4 = extractelement <8 x float> %v, i32 4
> + %vecext5 = extractelement <8 x float> %v, i32 5
> + %vecext6 = extractelement <8 x float> %v, i32 6
> + %vecext7 = extractelement <8 x float> %v, i32 7
> + %arrayidx1 = getelementptr inbounds float* %ptr, i64 1
> + %arrayidx2 = getelementptr inbounds float* %ptr, i64 2
> + %arrayidx3 = getelementptr inbounds float* %ptr, i64 3
> + %arrayidx4 = getelementptr inbounds float* %ptr, i64 4
> + %arrayidx5 = getelementptr inbounds float* %ptr, i64 5
> + %arrayidx6 = getelementptr inbounds float* %ptr, i64 6
> + %arrayidx7 = getelementptr inbounds float* %ptr, i64 7
> + store float %vecext0, float* %ptr, align 4
> + store float %vecext1, float* %arrayidx1, align 4
> + store float %vecext2, float* %arrayidx2, align 4
> + store float %vecext3, float* %arrayidx3, align 4
> + store float %vecext4, float* %arrayidx4, align 4
> + store float %vecext5, float* %arrayidx5, align 4
> + store float %vecext6, float* %arrayidx6, align 4
> + store float %vecext7, float* %arrayidx7, align 4
> + ret void
> +
> +; CHECK-LABEL: merge_vec_element_store
> +; CHECK: vmovups
> +; CHECK-NEXT: vzeroupper
> +; CHECK-NEXT: retq
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Alexey Samsonov
vonosmas at gmail.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141230/5ebeb5ce/attachment.html>
More information about the llvm-commits
mailing list