[llvm-dev] A volatile question.

Peter McKinna via llvm-dev
Thu Nov 21 20:58:26 PST 2019


  I have a small procedure that works without optimisations but
doesn't with opt -O3. It's written in Modula3 and tests the exception
handling of the language. M3 uses traditional setjmp - longjmp for
In the example, both raised and finally should be true.

(*-------------------- Test RAISE in FINALLY with another RAISE on stack.

  VAR raised := FALSE;  finally := FALSE;
        raised := TRUE;
        RAISE E;
        finally := TRUE;
        RAISE E;
      Test.check (FALSE); <*NOWARN*>
    EXCEPT E =>
      Test.checkM(raised," P25 raised");
      Test.checkM(finally," P25 finally");
  END P25;

Here's the generated IR - the finally block gets turned into a nested
which I haven't shown, hence the display. And setjmp has the returns_twice
which doesn't seem to make any difference.

define void @Main__P25() #0 !dbg !303 {
  %raised = alloca i8, align 1
  %finally = alloca i8, align 1
  %tmp.181 = alloca i8*, align 8
  %tmp.182 = alloca i8*, align 8
  %tmp.183 = alloca i64, align 8
  %tmp.184 = alloca %struct.2, align 8
  %tmp.185 = alloca %struct.3, align 8
  %__Display = alloca [1 x i8*]
  %__Display.i8pp = bitcast [1 x i8*]* %__Display to i8**
  %__NewDisplaySlot.i8pp = getelementptr inbounds i8*, i8**
%__Display.i8pp, i64 0
  store i8* %finally, i8** %__NewDisplaySlot.i8pp
  br label %second, !dbg !304

second:                                           ; preds = %entry
  call void @llvm.dbg.declare(metadata i8* %raised, metadata !305, metadata
!DIExpression()), !dbg !307
  call void @llvm.dbg.declare(metadata i8* %finally, metadata !308,
metadata !DIExpression()), !dbg !307
  store i8 0, i8* %finally, align 1, !dbg !309
  store i8 0, i8* %raised, align 1, !dbg !309
  store i8 0, i8* %finally, align 1, !dbg !309
  %v.58 = load i64, i64* @m3_jmpbuf_size, align 8, !dbg !309
  store i64 %v.58, i64* %tmp.183, align 8, !dbg !309
  %v.183 = load i64, i64* %tmp.183, align 8, !dbg !309
  %umul = mul nuw i64 2, %v.183, !dbg !309
  %jmpbuf_size = alloca i8, i64 %umul, !dbg !309
  store i8* %jmpbuf_size, i8** %tmp.181, align 8, !dbg !309
  %v.181 = load i8*, i8** %tmp.181, align 8, !dbg !309
  %loophole-addr_word = ptrtoint i8* %v.181 to i64, !dbg !309
  %v.1831 = load i64, i64* %tmp.183, align 8, !dbg !309
  %uadd = add nuw i64 %loophole-addr_word, %v.1831, !dbg !309
  %loophole-addr_word2 = inttoptr i64 %uadd to i8*, !dbg !309
  store i8* %loophole-addr_word2, i8** %tmp.182, align 8, !dbg !309
  br label %label_121, !dbg !310

label_121:                                        ; preds = %second
  %store_base.i8p = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
  %store_dest.i8p = getelementptr inbounds i8, i8* %store_base.i8p, i64 16,
!dbg !310
  %store_dest = bitcast i8* %store_dest.i8p to i8**, !dbg !310
  store i8* getelementptr inbounds (i8, i8* bitcast (%M_Const_struct*
@M_Const to i8*), i64 136), i8** %store_dest, align 8, !dbg !310
  %store_base.i8p3 = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
  %store_dest.i8p4 = getelementptr inbounds i8, i8* %store_base.i8p3, i64
8, !dbg !310
  %store_dest5 = bitcast i8* %store_dest.i8p4 to i64*, !dbg !310
  store i64 0, i64* %store_dest5, align 8, !dbg !310
  %pop_toadr = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
  call void @RTHooks__PushEFrame(i8* %pop_toadr), !dbg !310
  %v.1816 = load i8*, i8** %tmp.181, align 8, !dbg !310
  %store_base.i8p7 = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
  %store_dest.i8p8 = getelementptr inbounds i8, i8* %store_base.i8p7, i64
96, !dbg !310
  %store_dest9 = bitcast i8* %store_dest.i8p8 to i8**, !dbg !310
  store i8* %v.1816, i8** %store_dest9, align 8, !dbg !310
  %v.18110 = load i8*, i8** %tmp.181, align 8, !dbg !310
  %result = call i64 @_setjmp(i8* %v.18110), !dbg !310
  %icmp = icmp ne i64 %result, 0, !dbg !310
  br i1 %icmp, label %if_122, label %else_122, !dbg !310

else_122:                                         ; preds = %label_121
  %store_base.i8p11 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
  %store_dest.i8p12 = getelementptr inbounds i8, i8* %store_base.i8p11, i64
16, !dbg !311
  %store_dest13 = bitcast i8* %store_dest.i8p12 to i8**, !dbg !311
  store i8* bitcast (void (i8*)* @Main_M3_LINE_451 to i8*), i8**
%store_dest13, align 8, !dbg !311
  %__static_link_from_display = bitcast i8** %__Display.i8pp to i8*, !dbg
  %store_base.i8p14 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
  %store_dest.i8p15 = getelementptr inbounds i8, i8* %store_base.i8p14, i64
24, !dbg !311
  %store_dest16 = bitcast i8* %store_dest.i8p15 to i8**, !dbg !311
  store i8* %__static_link_from_display, i8** %store_dest16, align 8, !dbg
  br label %label_123, !dbg !311

label_123:                                        ; preds = %else_122
  %store_base.i8p17 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
  %store_dest.i8p18 = getelementptr inbounds i8, i8* %store_base.i8p17, i64
8, !dbg !311
  %store_dest19 = bitcast i8* %store_dest.i8p18 to i64*, !dbg !311
  store i64 3, i64* %store_dest19, align 8, !dbg !311
  %pop_toadr20 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
  call void @RTHooks__PushEFrame(i8* %pop_toadr20), !dbg !311
  store i8 1, i8* %raised, align 1, !dbg !312
  call void @RTHooks__Raise(i8* bitcast (%M_Const_struct* @M_Const to i8*),
i8* null, i8* bitcast (%M_Main_struct* @M_Main to i8*), i64 449), !dbg !313
  br label %label_124, !dbg !313

label_124:                                        ; preds = %label_123
  br label %if_122, !dbg !314

if_122:                                           ; preds = %label_124,
  %v.27 = load i8, i8* %raised, align 1, !dbg !315
  %zext = zext i8 %v.27 to i64, !dbg !315
  %pop_trunc = trunc i64 %zext to i8, !dbg !315
  call void @Test__checkM(i8 %pop_trunc, i8* getelementptr inbounds (i8,
i8* bitcast (%M_Const_struct* @M_Const to i8*), i64 280)), !dbg !315

  Here is a portion of the optimised IR.


  %result = call i64 @_setjmp(i8* nonnull %jmpbuf_size), !dbg !465
  %icmp = icmp eq i64 %result, 0, !dbg !465
  br i1 %icmp, label %else_122, label %if_122, !dbg !465

else_122:                                         ; preds = %entry
  %tmp.1853.sub = getelementptr inbounds [40 x i8], [40 x i8]* %tmp.1853,
i64 0, i64 0
  %store_dest.i8p12 = getelementptr inbounds [40 x i8], [40 x i8]*
%tmp.1853, i64 0, i64 16, !dbg !466
  %store_dest13 = bitcast i8* %store_dest.i8p12 to i8**, !dbg !466
  store i8* bitcast (void (i8*)* @Main_M3_LINE_451 to i8*), i8**
%store_dest13, align 8, !dbg !466
  %store_dest.i8p15 = getelementptr inbounds [40 x i8], [40 x i8]*
%tmp.1853, i64 0, i64 24, !dbg !466
  %0 = bitcast i8* %store_dest.i8p15 to i8***, !dbg !466
  store i8** %__Display, i8*** %0, align 8, !dbg !466
  %store_dest.i8p18 = getelementptr inbounds [40 x i8], [40 x i8]*
%tmp.1853, i64 0, i64 8, !dbg !466
  %store_dest19 = bitcast i8* %store_dest.i8p18 to i64*, !dbg !466
  store i64 3, i64* %store_dest19, align 8, !dbg !466
  call void @RTHooks__PushEFrame(i8* nonnull %tmp.1853.sub), !dbg !466
  call void @llvm.dbg.value(metadata i8 1, metadata !460, metadata
!DIExpression()), !dbg !462
  call void @RTHooks__Raise(i8* bitcast (%M_Const_struct* @M_Const to i8*),
i8* null, i8* bitcast (%M_Main_struct* @M_Main to i8*), i64 449), !dbg !467
  br label %if_122, !dbg !468

if_122:                                           ; preds = %entry,
  %raised.0 = phi i8 [ 0, %entry ], [ 1, %else_122 ], !dbg !462
  call void @llvm.dbg.value(metadata i8 %raised.0, metadata !460, metadata
!DIExpression()), !dbg !462
  call void @Test__checkM(i8 %raised.0, i8* bitcast (i8** getelementptr
inbounds (%M_Const_struct, %M_Const_struct* @M_Const, i64 0, i32 39) to
i8*)), !dbg !469


and the relevant bit of assembly in X86_64

        leaq    M_Const(%rip), %rdi
        leaq    M_Main(%rip), %rdx
        movl    $449, %ecx              # imm = 0x1C1
        xorl    %esi, %esi
        callq   RTHooks__Raise at PLT
        movb    $1, %al                    <--------Note setting raised to
true after longjmp
.LBB47_3:                               # %if_122
        #DEBUG_VALUE: P25:finally <- 0
        #DEBUG_VALUE: P25:raised <- $al
        .loc    1 457 0                 # Main.m3:457:0
        movzbl  %al, %edi
        leaq    M_Const+280(%rip), %rsi
        callq   Test__checkM at PLT

  The raised alloca has been optimised away. The store to raised of 1
before the RTHooks__Raise has been eliminated which means it remains false
until the phi after the Raise.
I guess because it's impossible to know that RTHooks__Raise eventually
calls longjmp.

After a bit of digging, I found that volatile on loads and stores could fix
the problem.
Setting it on loads alone doesn't help but setting all stores in the
procedure to volatile
does the trick.

I'm wondering if this is the right stick to use and whether it's too big. A
try except
block might be a small percentage of a function and I might be losing

Thanks Peter
