[llvm-dev] A volatile question.
Peter McKinna via llvm-dev
llvm-dev at lists.llvm.org
Thu Nov 21 20:58:26 PST 2019
Hi,
I have a small procedure that works without optimisations but
doesn't with opt -O3. It's written in Modula3 and tests the exception
handling of the language. M3 uses traditional setjmp - longjmp for
exceptions.
In the example, both raised and finally should be true.
(*-------------------- Test RAISE in FINALLY with another RAISE on stack.
---*)
PROCEDURE P25 () =
VAR raised := FALSE; finally := FALSE;
BEGIN
TRY
TRY
raised := TRUE;
RAISE E;
FINALLY
finally := TRUE;
RAISE E;
END;
Test.check (FALSE); <*NOWARN*>
EXCEPT E =>
Test.checkM(raised," P25 raised");
Test.checkM(finally," P25 finally");
END;
END P25;
Here's the generated IR - the finally block gets turned into a nested
procedure
which I haven't shown, hence the display. And setjmp has the returns_twice
attribute
which doesn't seem to make any difference.
define void @Main__P25() #0 !dbg !303 {
entry:
%raised = alloca i8, align 1
%finally = alloca i8, align 1
%tmp.181 = alloca i8*, align 8
%tmp.182 = alloca i8*, align 8
%tmp.183 = alloca i64, align 8
%tmp.184 = alloca %struct.2, align 8
%tmp.185 = alloca %struct.3, align 8
%__Display = alloca [1 x i8*]
%__Display.i8pp = bitcast [1 x i8*]* %__Display to i8**
%__NewDisplaySlot.i8pp = getelementptr inbounds i8*, i8**
%__Display.i8pp, i64 0
store i8* %finally, i8** %__NewDisplaySlot.i8pp
br label %second, !dbg !304
second: ; preds = %entry
call void @llvm.dbg.declare(metadata i8* %raised, metadata !305, metadata
!DIExpression()), !dbg !307
call void @llvm.dbg.declare(metadata i8* %finally, metadata !308,
metadata !DIExpression()), !dbg !307
store i8 0, i8* %finally, align 1, !dbg !309
store i8 0, i8* %raised, align 1, !dbg !309
store i8 0, i8* %finally, align 1, !dbg !309
%v.58 = load i64, i64* @m3_jmpbuf_size, align 8, !dbg !309
store i64 %v.58, i64* %tmp.183, align 8, !dbg !309
%v.183 = load i64, i64* %tmp.183, align 8, !dbg !309
%umul = mul nuw i64 2, %v.183, !dbg !309
%jmpbuf_size = alloca i8, i64 %umul, !dbg !309
store i8* %jmpbuf_size, i8** %tmp.181, align 8, !dbg !309
%v.181 = load i8*, i8** %tmp.181, align 8, !dbg !309
%loophole-addr_word = ptrtoint i8* %v.181 to i64, !dbg !309
%v.1831 = load i64, i64* %tmp.183, align 8, !dbg !309
%uadd = add nuw i64 %loophole-addr_word, %v.1831, !dbg !309
%loophole-addr_word2 = inttoptr i64 %uadd to i8*, !dbg !309
store i8* %loophole-addr_word2, i8** %tmp.182, align 8, !dbg !309
br label %label_121, !dbg !310
label_121: ; preds = %second
%store_base.i8p = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
%store_dest.i8p = getelementptr inbounds i8, i8* %store_base.i8p, i64 16,
!dbg !310
%store_dest = bitcast i8* %store_dest.i8p to i8**, !dbg !310
store i8* getelementptr inbounds (i8, i8* bitcast (%M_Const_struct*
@M_Const to i8*), i64 136), i8** %store_dest, align 8, !dbg !310
%store_base.i8p3 = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
%store_dest.i8p4 = getelementptr inbounds i8, i8* %store_base.i8p3, i64
8, !dbg !310
%store_dest5 = bitcast i8* %store_dest.i8p4 to i64*, !dbg !310
store i64 0, i64* %store_dest5, align 8, !dbg !310
%pop_toadr = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
call void @RTHooks__PushEFrame(i8* %pop_toadr), !dbg !310
%v.1816 = load i8*, i8** %tmp.181, align 8, !dbg !310
%store_base.i8p7 = bitcast %struct.2* %tmp.184 to i8*, !dbg !310
%store_dest.i8p8 = getelementptr inbounds i8, i8* %store_base.i8p7, i64
96, !dbg !310
%store_dest9 = bitcast i8* %store_dest.i8p8 to i8**, !dbg !310
store i8* %v.1816, i8** %store_dest9, align 8, !dbg !310
%v.18110 = load i8*, i8** %tmp.181, align 8, !dbg !310
%result = call i64 @_setjmp(i8* %v.18110), !dbg !310
%icmp = icmp ne i64 %result, 0, !dbg !310
br i1 %icmp, label %if_122, label %else_122, !dbg !310
else_122: ; preds = %label_121
%store_base.i8p11 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
%store_dest.i8p12 = getelementptr inbounds i8, i8* %store_base.i8p11, i64
16, !dbg !311
%store_dest13 = bitcast i8* %store_dest.i8p12 to i8**, !dbg !311
store i8* bitcast (void (i8*)* @Main_M3_LINE_451 to i8*), i8**
%store_dest13, align 8, !dbg !311
%__static_link_from_display = bitcast i8** %__Display.i8pp to i8*, !dbg
!311
%store_base.i8p14 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
%store_dest.i8p15 = getelementptr inbounds i8, i8* %store_base.i8p14, i64
24, !dbg !311
%store_dest16 = bitcast i8* %store_dest.i8p15 to i8**, !dbg !311
store i8* %__static_link_from_display, i8** %store_dest16, align 8, !dbg
!311
br label %label_123, !dbg !311
label_123: ; preds = %else_122
%store_base.i8p17 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
%store_dest.i8p18 = getelementptr inbounds i8, i8* %store_base.i8p17, i64
8, !dbg !311
%store_dest19 = bitcast i8* %store_dest.i8p18 to i64*, !dbg !311
store i64 3, i64* %store_dest19, align 8, !dbg !311
%pop_toadr20 = bitcast %struct.3* %tmp.185 to i8*, !dbg !311
call void @RTHooks__PushEFrame(i8* %pop_toadr20), !dbg !311
store i8 1, i8* %raised, align 1, !dbg !312
call void @RTHooks__Raise(i8* bitcast (%M_Const_struct* @M_Const to i8*),
i8* null, i8* bitcast (%M_Main_struct* @M_Main to i8*), i64 449), !dbg !313
br label %label_124, !dbg !313
label_124: ; preds = %label_123
br label %if_122, !dbg !314
if_122: ; preds = %label_124,
%label_121
%v.27 = load i8, i8* %raised, align 1, !dbg !315
%zext = zext i8 %v.27 to i64, !dbg !315
%pop_trunc = trunc i64 %zext to i8, !dbg !315
call void @Test__checkM(i8 %pop_trunc, i8* getelementptr inbounds (i8,
i8* bitcast (%M_Const_struct* @M_Const to i8*), i64 280)), !dbg !315
Here is a portion of the optimised IR.
...
%result = call i64 @_setjmp(i8* nonnull %jmpbuf_size), !dbg !465
%icmp = icmp eq i64 %result, 0, !dbg !465
br i1 %icmp, label %else_122, label %if_122, !dbg !465
else_122: ; preds = %entry
%tmp.1853.sub = getelementptr inbounds [40 x i8], [40 x i8]* %tmp.1853,
i64 0, i64 0
%store_dest.i8p12 = getelementptr inbounds [40 x i8], [40 x i8]*
%tmp.1853, i64 0, i64 16, !dbg !466
%store_dest13 = bitcast i8* %store_dest.i8p12 to i8**, !dbg !466
store i8* bitcast (void (i8*)* @Main_M3_LINE_451 to i8*), i8**
%store_dest13, align 8, !dbg !466
%store_dest.i8p15 = getelementptr inbounds [40 x i8], [40 x i8]*
%tmp.1853, i64 0, i64 24, !dbg !466
%0 = bitcast i8* %store_dest.i8p15 to i8***, !dbg !466
store i8** %__Display, i8*** %0, align 8, !dbg !466
%store_dest.i8p18 = getelementptr inbounds [40 x i8], [40 x i8]*
%tmp.1853, i64 0, i64 8, !dbg !466
%store_dest19 = bitcast i8* %store_dest.i8p18 to i64*, !dbg !466
store i64 3, i64* %store_dest19, align 8, !dbg !466
call void @RTHooks__PushEFrame(i8* nonnull %tmp.1853.sub), !dbg !466
call void @llvm.dbg.value(metadata i8 1, metadata !460, metadata
!DIExpression()), !dbg !462
call void @RTHooks__Raise(i8* bitcast (%M_Const_struct* @M_Const to i8*),
i8* null, i8* bitcast (%M_Main_struct* @M_Main to i8*), i64 449), !dbg !467
br label %if_122, !dbg !468
if_122: ; preds = %entry,
%else_122
%raised.0 = phi i8 [ 0, %entry ], [ 1, %else_122 ], !dbg !462
call void @llvm.dbg.value(metadata i8 %raised.0, metadata !460, metadata
!DIExpression()), !dbg !462
call void @Test__checkM(i8 %raised.0, i8* bitcast (i8** getelementptr
inbounds (%M_Const_struct, %M_Const_struct* @M_Const, i64 0, i32 39) to
i8*)), !dbg !469
...
and the relevant bit of assembly in X86_64
leaq M_Const(%rip), %rdi
leaq M_Main(%rip), %rdx
movl $449, %ecx # imm = 0x1C1
xorl %esi, %esi
callq RTHooks__Raise at PLT
movb $1, %al <--------Note setting raised to
true after longjmp
.Ltmp272:
.LBB47_3: # %if_122
#DEBUG_VALUE: P25:finally <- 0
#DEBUG_VALUE: P25:raised <- $al
.loc 1 457 0 # Main.m3:457:0
movzbl %al, %edi
leaq M_Const+280(%rip), %rsi
callq Test__checkM at PLT
The raised alloca has been optimised away. The store to raised of 1
before the RTHooks__Raise has been eliminated which means it remains false
until the phi after the Raise.
I guess because it's impossible to know that RTHooks__Raise eventually
calls longjmp.
After a bit of digging, I found that volatile on loads and stores could fix
the problem.
Setting it on loads alone doesn't help but setting all stores in the
procedure to volatile
does the trick.
I'm wondering if this is the right stick to use and whether it's too big. A
try except
block might be a small percentage of a function and I might be losing
optimisations
elsewhere.
Thanks Peter
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20191122/5707e925/attachment.html>
More information about the llvm-dev
mailing list