[PATCH] D71828: [InstCombine] Convert vector store to scalar store if only one element updated
Qiu Chaofan via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 00:25:04 PST 2019
qiucf created this revision.
qiucf added reviewers: spatel, nemanjai, PowerPC, andrewrk, fhahn, efriedma, bogner.
qiucf added a project: LLVM.
Herald added a subscriber: llvm-commits.
This is a simplified version of https://reviews.llvm.org/D70223. Since we can at least be confident that single element store won't be worse than vector store, it's clearer to put them into `InstCombine`. And there's already some logic about transforming a `shufflevector` affecting one element into `insertelement`, we can take care of the only case.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D71828
Files:
llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
llvm/test/Transforms/InstCombine/single-element-store.ll
Index: llvm/test/Transforms/InstCombine/single-element-store.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/single-element-store.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-LABEL: @insert_store
+; CHECK: %0 = getelementptr <16 x i8>, <16 x i8>* %q, i64 0, i64 3
+; CHECK-NEXT: store i8 %s, i8* %0, align 16
+define void @insert_store(<16 x i8>* %q, i8 zeroext %s) {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* %q
+ %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
+ store <16 x i8> %vecins, <16 x i8>* %q
+ ret void
+}
+
+; CHECK-LABEL: @single_shuffle_store
+; CHECK: %0 = getelementptr <4 x i32>, <4 x i32>* %a, i64 0, i64 1
+; CHECK-NEXT: store i32 %b, i32* %0, align 16
+define void @single_shuffle_store(<4 x i32>* %a, i32 %b) {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* %a
+ %1 = insertelement <4 x i32> %0, i32 %b, i32 1
+ %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+ store <4 x i32> %2, <4 x i32>* %a
+ ret void
+}
+
+; CHECK-LABEL: @volatile_update
+; CHECK: %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
+; CHECK-NEXT: store volatile <16 x i8> %vecins, <16 x i8>* %q
+define void @volatile_update(<16 x i8>* %q, i8 zeroext %s) {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* %q
+ %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
+ store volatile <16 x i8> %vecins, <16 x i8>* %q
+ ret void
+}
Index: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1170,6 +1170,41 @@
return false;
}
+// Combine patterns like:
+// %0 = load <4 x i32>, <4 x i32>* %a
+// %1 = insertelement <4 x i32> %0, i32 %b, i32 1
+// store <4 x i32> %1, <4 x i32>* %a
+// to:
+// %0 = getelementptr <4 x i32>, <4 x i32>* %a, i64 0, i64 1
+// store i32 %b, i32* %0
+static Instruction *foldSingleElementStore(InstCombiner &IC, StoreInst &SI) {
+ if (!SI.isSimple())
+ return nullptr;
+
+ Instruction *Source;
+ Value *NewElement;
+ Constant *Idx;
+ if (!match(SI.getValueOperand(), m_InsertElement(m_Instruction(Source),
+ m_Value(NewElement),
+ m_Constant(Idx))))
+ return nullptr;
+
+ // Only do the fold when the load is not volatile/atomic.
+ if (LoadInst *Load = dyn_cast<LoadInst>(Source)) {
+ if (!Load->isSimple())
+ return nullptr;
+ auto ElePtrType = NewElement->getType()->getPointerTo();
+ auto ElePtr = IC.Builder.CreatePointerCast(SI.getPointerOperand(),
+ ElePtrType);
+ auto GEP = IC.Builder.CreateGEP(ElePtr, Idx);
+ SI.setOperand(0, NewElement);
+ SI.setOperand(1, GEP);
+ return &SI;
+ }
+
+ return nullptr;
+}
+
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
// FIXME: We could probably with some care handle both volatile and atomic
// stores here but it isn't clear that this is important.
@@ -1394,6 +1429,9 @@
// FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
if (!SI.isUnordered()) return nullptr;
+ if (Instruction *NewSI = foldSingleElementStore(*this, SI))
+ return NewSI;
+
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
if (Ptr->hasOneUse()) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D71828.235102.patch
Type: text/x-patch
Size: 3576 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191223/a52d0022/attachment.bin>
More information about the llvm-commits
mailing list