[llvm-dev] Question about supporting zext on IVUsers and LSR

Jingu Kang via llvm-dev llvm-dev at lists.llvm.org
Thu Nov 25 06:37:51 PST 2021


Hi All,

I am looking at a simple example as below.

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

%struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] }
%struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] }
%struct.match = type { i32, i32 }
%struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] }
%struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] }

define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) {
entry:
  br label %while.cond

while.cond:                                       ; preds = %while.cond, %entry
  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
  %idxprom = zext i32 %i.0 to i64
  %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0
  %0 = load i32, i32* %len1, align 4
  %cmp = icmp ult i32 %0, %len
  %inc = add i32 %i.0, 1
  br i1 %cmp, label %while.cond, label %while.end

while.end:                                        ; preds = %while.cond
  ret i32 %i.0
}

I expected the LSR pass extracts the loop invariant part from `%len1 = getelementptr` and hoists it to preheader. It could cause a new IV for the loop dependent part from gep inside loop and `%0 = load` could use it. However, it looks the `IVUsers` does process the `%idxprom = zext`. I can see the `SCEVAddRecExpr` and `SCEVAddExpr` are handled in `isInteresting` function. It seems LSR pass does not also handle the `zext` for `IVChain`. If I remove the `%idxprom = zext` manually on above example, I can see LSR works as the expectation. Does anyone know why the `zext` is not supported on IVUsers and LSR? Does it make LSR difficult to construct formulas and compare them?  If I missed something, please let me know.

For reference, the assembly output of above example with `-O3` is as below.

test:
               mov       w8, w0
               mov       w0, #-1
.LBB0_1:
               add        w0, w0, #1
               add        x9, x1, w0, uxtw #3
               ldr          w9, [x9, #724]
               cmp       w9, w8
               b.lo        .LBB0_1
               Ret

If I remove the `zext`, the output is as below and the loop has one less instruction against above output.

test:
               add        x9, x1, #724
               mov       x8, #-1
.LBB0_1:
               ldr          w10, [x9], #8
               add        x8, x8, #1
               cmp       w10, w0
               b.lo        .LBB0_1
               mov       x0, x8
               ret

The IR code, in which the `zext` is removed, is as below.

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

%struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] }
%struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] }
%struct.match = type { i32, i32 }
%struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] }
%struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] }

;define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) {
define i64 @test(i32 %len, %struct.base_s* nocapture readonly %obj) {
entry:
  br label %while.cond

while.cond:                                       ; preds = %while.cond, %entry
;  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
  %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.cond ]
;  %idxprom = zext i32 %i.0 to i64
;  %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0
  %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %i.0, i32 0
  %0 = load i32, i32* %len1, align 4
  %cmp = icmp ult i32 %0, %len
;  %inc = add i32 %i.0, 1
  %inc = add i64 %i.0, 1
  br i1 %cmp, label %while.cond, label %while.end

while.end:                                        ; preds = %while.cond
;  ret i32 %i.0
  ret i64 %i.0
}

Thanks
JinGu Kang
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20211125/44585c82/attachment.html>


More information about the llvm-dev mailing list