[PATCH] D92668: [SLP]Merge reorder and reuse shuffles.
Jordan Rupprecht via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 30 16:27:58 PST 2020
rupprecht added a comment.
Finally got a reasonably sized reduction. It's probably so long because I'm just using clang to build, and if it gets smaller than clang optimizes too much of it away; if I used opt to control the passes I could probably get it smaller. Anyway, hope this is good enough to demonstrate the issue:
; ModuleID = './repro.ll'
source_filename = "repro.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.widget = type { %struct.baz }
%struct.baz = type { i32 (...)**, i32, i32, i32, i32, i8* }
%struct.snork = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8*, %struct.spam*, i8*, i8* }
%struct.spam = type { i8, i8, i8, i32, i32, i32, i32 }
%struct.zot = type { i8 }
%struct.wombat = type { i8 }
define void @wombat(%struct.widget* %arg) local_unnamed_addr align 32 personality i8* undef {
bb:
%tmp = alloca float, align 4
%tmp1 = alloca float, align 4
%tmp2 = alloca float, align 4
%tmp3 = alloca float, align 4
%tmp4 = alloca %struct.snork, align 8
%tmp7 = getelementptr inbounds %struct.widget, %struct.widget* %arg, i64 0, i32 0, i32 2
%tmp8 = getelementptr inbounds %struct.widget, %struct.widget* %arg, i64 0, i32 0, i32 1
%tmp9 = getelementptr inbounds %struct.snork, %struct.snork* %tmp4, i64 0, i32 11
invoke void bitcast (void ()* @quux to void (%struct.snork*, %struct.zot*)*)(%struct.snork* %tmp4, %struct.zot* null)
to label %bb11 unwind label %bb47
bb11: ; preds = %bb
%tmp12 = load i32, i32* %tmp7, align 4
%tmp13 = load i32, i32* %tmp8, align 8
invoke void bitcast (void ()* @wobble to void (%struct.wombat*, float*, float*, float*, float*)*)(%struct.wombat* undef, float* %tmp, float* %tmp1, float* %tmp2, float* %tmp3)
to label %bb14 unwind label %bb47
bb14: ; preds = %bb11
%tmp15 = load float, float* %tmp, align 4
%tmp16 = load float, float* %tmp1, align 4
%tmp17 = load float, float* %tmp2, align 4
%tmp18 = load float, float* %tmp3, align 4
%tmp20 = load %struct.spam*, %struct.spam** %tmp9, align 8
%tmp21 = add nsw i32 %tmp12, -1
%tmp22 = fptosi float %tmp15 to i32
%tmp23 = icmp slt i32 %tmp22, 0
%tmp24 = icmp sgt i32 %tmp12, %tmp22
%tmp25 = select i1 %tmp24, i32 %tmp22, i32 %tmp21
%tmp26 = select i1 %tmp23, i32 0, i32 %tmp25
%tmp27 = getelementptr inbounds %struct.spam, %struct.spam* %tmp20, i64 0, i32 3
store i32 %tmp26, i32* %tmp27, align 8
%tmp28 = add nsw i32 %tmp13, -1
%tmp29 = fptosi float %tmp16 to i32
%tmp30 = icmp slt i32 %tmp29, 0
%tmp31 = icmp sgt i32 %tmp13, %tmp29
%tmp32 = select i1 %tmp31, i32 %tmp29, i32 %tmp28
%tmp33 = select i1 %tmp30, i32 0, i32 %tmp32
%tmp34 = getelementptr inbounds %struct.spam, %struct.spam* %tmp20, i64 0, i32 4
store i32 %tmp33, i32* %tmp34, align 4
%tmp35 = fptosi float %tmp17 to i32
%tmp36 = icmp slt i32 %tmp35, 0
%tmp37 = icmp sgt i32 %tmp12, %tmp35
%tmp38 = select i1 %tmp37, i32 %tmp35, i32 %tmp21
%tmp39 = select i1 %tmp36, i32 0, i32 %tmp38
%tmp40 = getelementptr inbounds %struct.spam, %struct.spam* %tmp20, i64 0, i32 5
store i32 %tmp39, i32* %tmp40, align 8
%tmp41 = fptosi float %tmp18 to i32
%tmp42 = icmp slt i32 %tmp41, 0
%tmp43 = icmp sgt i32 %tmp13, %tmp41
%tmp44 = select i1 %tmp43, i32 %tmp41, i32 %tmp28
%tmp45 = select i1 %tmp42, i32 0, i32 %tmp44
%tmp46 = getelementptr inbounds %struct.spam, %struct.spam* %tmp20, i64 0, i32 6
store i32 %tmp45, i32* %tmp46, align 4
ret void
bb47: ; preds = %bb11, %bb10
%tmp48 = landingpad { i8*, i32 }
cleanup
ret void
}
declare void @quux() unnamed_addr
declare void @wobble() local_unnamed_addr
Building with `clang -c repro.ll -O3 -o optimized.ll -fno-discard-value-names -S -emit-llvm`, before this patch we had:
%1 = load <2 x i32>, <2 x i32>* %0, align 8
%reorder_shuffle = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
...
%shuffle = shufflevector <2 x i32> %reorder_shuffle, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
...
%2 = add nsw <2 x i32> %reorder_shuffle, <i32 -1, i32 -1>
And after, we have:
%1 = load <2 x i32>, <2 x i32>* %0, align 8
...
%shuffle = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
...
%2 = add nsw <2 x i32> %1, <i32 -1, i32 -1>
As with the previous snippet,
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D92668/new/
https://reviews.llvm.org/D92668
More information about the llvm-commits
mailing list