[llvm-dev] Potentially unsafe loop optimization

Wed Feb 17 15:57:39 PST 2021

I'm seeing a case where an add instruction with "nuw" is moved to what
I think is an unsafe place and I'm wondering if there really is a bug
here.  This comes from the small Ada code:

PROCEDURE C26006A IS

        procedure C_abort with Import, Convention => C,
          External_Name => "abort";

        S1 : STRING (1..3) := "A 1";
        S2 : STRING (1..3) := "A 2";

BEGIN
        FOR C IN CHARACTER'FIRST .. CHARACTER'LAST LOOP
                S1 (2) := C;
                S2 (2) := C;
                if S1 = S2 then
                   C_Abort;
                end if;
        END LOOP;
END C26006A;

and the issue is the loop, which is from (interpreting the i8 as unsigned)
0 to 255 (-1).

The loop end test looks like:

loop.cond.iter:                                   ; preds = %8
  %6 = load i8, i8* %c, align 1, !tbaa !5
  %loop.iter.cond = icmp eq i8 %6, -1
  br i1 %loop.iter.cond, label %loop.exit, label %loop.iter

loop.iter:                                        ; preds = %loop.cond.iter
  %next.loop.var = add nuw i8 %6, 1
  store i8 %next.loop.var, i8* %c, align 1, !tbaa !5
  br label %loop.cond

This looks correct.  We do the add after the conditional branch, so
we'll never see it overflow.

But when we optimize with -O2 on this, we get:

loop.cond.iter:                                   ; preds = %loop.cond, %3
  %loop.iter.cond = icmp eq i8 %c.0, -1
  %next.loop.var = add nuw i8 %c.0, 1
  br i1 %loop.iter.cond, label %loop.exit, label %loop.cond

But now the add is done unconditionally, whatever the result of the
comparison.  That means that in the case where the icmp is true, we
execute an undefined instruction.  When we generate code from this, the
code generator notices that, eliminates the test, and makes this an
an conditional jump back, with the relevant code being:

.LBB0_3:                                # %loop.cond.iter
                                        #   in Loop: Header=BB0_1 Depth=1
        incb    %bl
.LBB0_1:                                # %loop.cond
                                        # =>This Inner Loop Header: Depth=1
        movb    %bl, 17(%rsp)
        movb    %bl, 1(%rsp)
        movzwl  (%rsp), %eax
        xorw    16(%rsp), %ax
        movzbl  2(%rsp), %ecx
        xorb    18(%rsp), %cl
        movzbl  %cl, %ecx
        orw     %ax, %cx
        jne     .LBB0_3
# %bb.2:                                #   in Loop: Header=BB0_1 Depth=1
        callq   abort
        jmp     .LBB0_3

I think the initial IR is correct, but the loop optimization here is
unsafe: if it wants to promote the add instruction, it needs to remove
any "nsw" or "nuw".  Does that sound correct?

The full .ll is below:

; ModuleID = 'c26006a.adb'
source_filename = "c26006a.adb"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @_ada_c26006a() {
entry:
  %s1 = alloca { { i32, i32 }, [3 x i8] }, align 4
  %s2 = alloca { { i32, i32 }, [3 x i8] }, align 4
  %c = alloca i8, align 1
  %0 = bitcast { { i32, i32 }, [3 x i8] }* %s1 to [3 x i8]*
  store [3 x i8] c"A 1", [3 x i8]* %0, align 4, !tbaa !0
  %1 = bitcast { { i32, i32 }, [3 x i8] }* %s2 to [3 x i8]*
  store [3 x i8] c"A 2", [3 x i8]* %1, align 4, !tbaa !0
  store i8 0, i8* %c, align 1, !tbaa !5
  br i1 true, label %loop.cond, label %loop.exit

loop.cond:                                        ; preds = %loop.iter, %entry
  %2 = load i8, i8* %c, align 1, !tbaa !5
  %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i64 0, i32 1
  store i8 %2, i8* %3, align 1, !tbaa !8
  %4 = load i8, i8* %c, align 1, !tbaa !5
  %5 = getelementptr inbounds [3 x i8], [3 x i8]* %1, i64 0, i32 1
  store i8 %4, i8* %5, align 1, !tbaa !8
  br i1 false, label %rhs.has.0.dim, label %normal.tests

loop.iter:                                        ; preds = %loop.cond.iter
  %next.loop.var = add nuw i8 %6, 1
  store i8 %next.loop.var, i8* %c, align 1, !tbaa !5
  br label %loop.cond

loop.exit:                                        ; preds = %loop.cond.iter, %entry
  ret void

loop.cond.iter:                                   ; preds = %8
  %6 = load i8, i8* %c, align 1, !tbaa !5
  %loop.iter.cond = icmp eq i8 %6, -1
  br i1 %loop.iter.cond, label %loop.exit, label %loop.iter

7:                                                ; preds = %9, %rhs.has.0.dim
  call void @abort()
  br label %8

8:                                                ; preds = %7, %9, %normal.tests
  br label %loop.cond.iter

rhs.has.0.dim:                                    ; preds = %loop.cond
  br i1 false, label %7, label %normal.tests

normal.tests:                                     ; preds = %rhs.has.0.dim, %loop.cond
  br i1 true, label %9, label %8

9:                                                ; preds = %normal.tests
  %10 = bitcast [3 x i8]* %1 to i8*
  %11 = bitcast [3 x i8]* %0 to i8*
  %12 = call i32 @memcmp(i8* align 4 %10, i8* align 4 %11, i64 3)
  %13 = icmp eq i32 %12, 0
  br i1 %13, label %7, label %8
}

; Function Attrs: nounwind
declare dso_local i32 @memcmp(i8* nocapture nonnull readonly, i8* nocapture nonnull readonly, i64) #0

declare void @abort()

attributes #0 = { nounwind }

!0 = !{!1, !1, i64 0, i64 3}
!1 = !{!2, i64 3, !"c26006a__T2b#TN#AD", !3, i64 0, i64 1, !3, i64 1, i64 1, !3, i64 2, i64 1}
!2 = !{!"Ada Root"}
!3 = !{!4, i64 1, !"character#T0"}
!4 = !{!2, i64 1, !"character#TN"}
!5 = !{!6, !6, i64 0, i64 1}
!6 = !{!7, i64 1, !"T4b#T4"}
!7 = !{!4, i64 1, !"T4b#TN"}
!8 = !{!3, !3, i64 0, i64 1}