[PATCH] D89479: [SimplifyCFG] Be more conservative when speculating in loops. (WIP)
Florian Hahn via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 15 13:45:09 PDT 2020
fhahn added a comment.
In D89479#2333065 <https://reviews.llvm.org/D89479#2333065>, @spatel wrote:
> Another reason that we would likely want a finer-grain solution: recent AMD implementations appear to have full-speed lzcnt (1 cycle and full throughput according to Agner's tables for Jaguar and Ryzen).
Yeah, the issue here is really the throughput/number of execution units available together with the number of cycles. I guess we could ask TTI about that and get roughly sane results?
================
Comment at: llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll:416
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; ALL-NEXT: br i1 [[TOBOOL]], label [[COND_END]], label [[COND_TRUE:%.*]]
+; ALL: cond.true:
----------------
craig.topper wrote:
> Do you have a better example more like the loops you're seeing performance issues on?
>
> This one looks kind of silly since %x is loop invariant.
I can make it more complex. The origin IR from the benchmark is below (I can also provide a run-able version, but it require downloading some swift libraries for macOS)
```
define hidden swiftcc i64 @wobble(i64 %arg, %struct.blam* %arg1) local_unnamed_addr #3 {
bb:
%tmp = alloca <{ %struct.pluto, %struct.pluto }>, align 8
%tmp2 = ptrtoint %struct.blam* %arg1 to i64
%tmp3 = and i64 %tmp2, 2305843009213693952
%tmp4 = icmp eq i64 %tmp3, 0
%tmp5 = lshr i64 %tmp2, 56
%tmp6 = and i64 %tmp5, 15
%tmp7 = and i64 %arg, 281474976710655
%tmp8 = select i1 %tmp4, i64 %tmp7, i64 %tmp6
%tmp9 = icmp eq i64 %tmp8, 0
br i1 %tmp9, label %bb64, label %bb10, !prof !16, !misexpect !17
bb10: ; preds = %bb
%tmp11 = and i64 %tmp2, 1152921504606846976
%tmp12 = icmp eq i64 %tmp11, 0
%tmp13 = bitcast <{ %struct.pluto, %struct.pluto }>* %tmp to i8*
%tmp14 = and i64 %tmp2, 72057594037927935
%tmp15 = getelementptr inbounds <{ %struct.pluto, %struct.pluto }>, <{ %struct.pluto, %struct.pluto }>* %tmp, i64 0, i32 0, i32 0
%tmp16 = getelementptr inbounds <{ %struct.pluto, %struct.pluto }>, <{ %struct.pluto, %struct.pluto }>* %tmp, i64 0, i32 1, i32 0
%tmp17 = bitcast <{ %struct.pluto, %struct.pluto }>* %tmp to %struct.barney*
%tmp18 = and i64 %arg, 1152921504606846976
%tmp19 = icmp eq i64 %tmp18, 0
%tmp20 = and i64 %tmp2, 1152921504606846975
%tmp21 = add nuw nsw i64 %tmp20, 32
br label %bb22
bb22: ; preds = %bb59, %bb10
%tmp23 = phi i64 [ 0, %bb10 ], [ %tmp60, %bb59 ]
%tmp24 = phi i64 [ 0, %bb10 ], [ %tmp56, %bb59 ]
br i1 %tmp12, label %bb25, label %bb26
bb25: ; preds = %bb22
br i1 %tmp4, label %bb30, label %bb31
bb26: ; preds = %bb22
%tmp27 = shl i64 %tmp24, 16
%tmp28 = tail call swiftcc { i32, i64 } @snork(i64 %tmp27, i64 %arg, %struct.blam* %arg1)
%tmp29 = extractvalue { i32, i64 } %tmp28, 1
br label %bb54
bb30: ; preds = %bb25
br i1 %tmp19, label %bb41, label %bb44, !prof !16, !misexpect !18
bb31: ; preds = %bb25
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %tmp13)
store i64 %arg, i64* %tmp15, align 8
store i64 %tmp14, i64* %tmp16, align 8
%tmp32 = getelementptr inbounds %struct.barney, %struct.barney* %tmp17, i64 %tmp24, i32 0
%tmp33 = load i8, i8* %tmp32, align 1
%tmp34 = icmp sgt i8 %tmp33, -1
br i1 %tmp34, label %bb39, label %bb35
bb35: ; preds = %bb31
%tmp36 = xor i8 %tmp33, -1
%tmp37 = tail call i8 @llvm.ctlz.i8(i8 %tmp36, i1 false), !range !19
%tmp38 = zext i8 %tmp37 to i64
br label %bb39
bb39: ; preds = %bb35, %bb31
%tmp40 = phi i64 [ 1, %bb31 ], [ %tmp38, %bb35 ]
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %tmp13)
br label %bb54
bb41: ; preds = %bb30
%tmp42 = tail call swiftcc { i64, i64 } @bar(i64 %arg, %struct.blam* %arg1)
%tmp43 = extractvalue { i64, i64 } %tmp42, 0
br label %bb44
bb44: ; preds = %bb41, %bb30
%tmp45 = phi i64 [ %tmp43, %bb41 ], [ %tmp21, %bb30 ]
%tmp46 = inttoptr i64 %tmp45 to %struct.barney*
%tmp47 = getelementptr inbounds %struct.barney, %struct.barney* %tmp46, i64 %tmp24, i32 0
%tmp48 = load i8, i8* %tmp47, align 1
%tmp49 = icmp sgt i8 %tmp48, -1
br i1 %tmp49, label %bb54, label %bb50
bb50: ; preds = %bb44
%tmp51 = xor i8 %tmp48, -1
%tmp52 = tail call i8 @llvm.ctlz.i8(i8 %tmp51, i1 false), !range !19
%tmp53 = zext i8 %tmp52 to i64
br label %bb54
bb54: ; preds = %bb50, %bb44, %bb39, %bb26
%tmp55 = phi i64 [ %tmp29, %bb26 ], [ %tmp40, %bb39 ], [ 1, %bb44 ], [ %tmp53, %bb50 ]
%tmp56 = add i64 %tmp55, %tmp24
%tmp57 = tail call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %tmp23, i64 1)
%tmp58 = extractvalue { i64, i1 } %tmp57, 1
br i1 %tmp58, label %bb66, label %bb59, !prof !16, !misexpect !18
bb59: ; preds = %bb54
%tmp60 = extractvalue { i64, i1 } %tmp57, 0
%tmp61 = icmp slt i64 %tmp56, %tmp8
br i1 %tmp61, label %bb22, label %bb62, !prof !20, !misexpect !17
bb62: ; preds = %bb59
%tmp63 = extractvalue { i64, i1 } %tmp57, 0
br label %bb64
bb64: ; preds = %bb62, %bb
%tmp65 = phi i64 [ 0, %bb ], [ %tmp63, %bb62 ]
ret i64 %tmp65
bb66: ; preds = %bb54
tail call void asm sideeffect "", "n"(i32 0) #5
tail call void @llvm.trap()
unreachable
}
```
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D89479/new/
https://reviews.llvm.org/D89479
More information about the llvm-commits
mailing list