[llvm-dev] Question about supporting zext on IVUsers and LSR
Jingu Kang via llvm-dev
llvm-dev at lists.llvm.org
Thu Nov 25 06:37:51 PST 2021
Hi All,
I am looking at a simple example as below.
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
%struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] }
%struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] }
%struct.match = type { i32, i32 }
%struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] }
%struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] }
define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) {
entry:
br label %while.cond
while.cond: ; preds = %while.cond, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
%idxprom = zext i32 %i.0 to i64
%len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0
%0 = load i32, i32* %len1, align 4
%cmp = icmp ult i32 %0, %len
%inc = add i32 %i.0, 1
br i1 %cmp, label %while.cond, label %while.end
while.end: ; preds = %while.cond
ret i32 %i.0
}
I expected the LSR pass extracts the loop invariant part from `%len1 = getelementptr` and hoists it to preheader. It could cause a new IV for the loop dependent part from gep inside loop and `%0 = load` could use it. However, it looks the `IVUsers` does process the `%idxprom = zext`. I can see the `SCEVAddRecExpr` and `SCEVAddExpr` are handled in `isInteresting` function. It seems LSR pass does not also handle the `zext` for `IVChain`. If I remove the `%idxprom = zext` manually on above example, I can see LSR works as the expectation. Does anyone know why the `zext` is not supported on IVUsers and LSR? Does it make LSR difficult to construct formulas and compare them? If I missed something, please let me know.
For reference, the assembly output of above example with `-O3` is as below.
test:
mov w8, w0
mov w0, #-1
.LBB0_1:
add w0, w0, #1
add x9, x1, w0, uxtw #3
ldr w9, [x9, #724]
cmp w9, w8
b.lo .LBB0_1
Ret
If I remove the `zext`, the output is as below and the loop has one less instruction against above output.
test:
add x9, x1, #724
mov x8, #-1
.LBB0_1:
ldr w10, [x9], #8
add x8, x8, #1
cmp w10, w0
b.lo .LBB0_1
mov x0, x8
ret
The IR code, in which the `zext` is removed, is as below.
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
%struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] }
%struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] }
%struct.match = type { i32, i32 }
%struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] }
%struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] }
;define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) {
define i64 @test(i32 %len, %struct.base_s* nocapture readonly %obj) {
entry:
br label %while.cond
while.cond: ; preds = %while.cond, %entry
; %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
%i.0 = phi i64 [ 0, %entry ], [ %inc, %while.cond ]
; %idxprom = zext i32 %i.0 to i64
; %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0
%len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %i.0, i32 0
%0 = load i32, i32* %len1, align 4
%cmp = icmp ult i32 %0, %len
; %inc = add i32 %i.0, 1
%inc = add i64 %i.0, 1
br i1 %cmp, label %while.cond, label %while.end
while.end: ; preds = %while.cond
; ret i32 %i.0
ret i64 %i.0
}
Thanks
JinGu Kang
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20211125/44585c82/attachment.html>
More information about the llvm-dev
mailing list