[LLVMbugs] [Bug 21273] New: inline asm incorrectly handles output operands sometimes
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Tue Oct 14 06:39:07 PDT 2014
http://llvm.org/bugs/show_bug.cgi?id=21273
Bug ID: 21273
Summary: inline asm incorrectly handles output operands
sometimes
Product: new-bugs
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: normal
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: dimitry at andric.com
CC: llvmbugs at cs.uiuc.edu
Classification: Unclassified
Depending on the optimization level, clang trunk r219624 seems to sometimes
handle output operands incorrectly. This was reported to me by FreeBSD kernel
developers, who attempted to compile the following:
int ivy_rng_store(long *buf)
{
long tmp;
int retry;
retry = 10;
__asm __volatile(
"1:\n\t"
"rdrand %2\n\t" /* read randomness into tmp */
"jb 2f\n\t" /* CF is set on success, exit retry loop */
"dec %0\n\t" /* otherwise, retry-- */
"jne 1b\n\t" /* and loop if retries are not exhausted */
"jmp 3f\n" /* failure, retry is 0, used as return value */
"2:\n\t"
"mov %2,%1\n\t" /* *buf = tmp */
"3:"
: "+q" (retry), "=m" (*buf), "=q" (tmp) : : "cc");
return (retry);
}
E.g., the intent is that 'tmp' is just used for output, but the actual value is
not used outside the inline asm. It is stored to *buf instead.
However, clang -O0 seems to have trouble keeping the two apart, as the
resulting assembly is:
ivy_rng_store: # @ivy_rng_store
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp0:
.cfi_def_cfa_offset 16
.Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp2:
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movl $10, -20(%rbp)
movl -20(%rbp), %eax
movq -8(%rbp), %rdi
#APP
.Ltmp3:
rdrandq %rdi
jb .Ltmp4
decl %eax
jne .Ltmp3
jmp .Ltmp5
.Ltmp4:
movq %rdi, (%rdi)
.Ltmp5:
#NO_APP
movl %eax, -20(%rbp)
movq %rdi, -16(%rbp)
movl -20(%rbp), %eax
popq %rbp
retq
Clearly, the movq %rdi, (%rdi) is incorrect. This seems to be magically solved
by enabling optimization, e.g. at -O1 or higher:
ivy_rng_store: # @ivy_rng_store
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp0:
.cfi_def_cfa_offset 16
.Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp2:
.cfi_def_cfa_register %rbp
movl $10, %eax
#APP
.Ltmp3:
rdrandq %rcx
jb .Ltmp4
decl %eax
jne .Ltmp3
jmp .Ltmp5
.Ltmp4:
movq %rcx, (%rdi)
.Ltmp5:
#NO_APP
popq %rbp
retq
Something similar happens when targeting i386 at -O0:
ivy_rng_store: # @ivy_rng_store
# BB#0: # %entry
pushl %ebp
movl %esp, %ebp
subl $12, %esp
movl 8(%ebp), %eax
movl %eax, -4(%ebp)
movl $10, -12(%ebp)
movl -12(%ebp), %eax
movl -4(%ebp), %ecx
#APP
.Ltmp0:
rdrandl %ecx
jb .Ltmp1
decl %eax
jne .Ltmp0
jmp .Ltmp2
.Ltmp1:
movl %ecx, (%ecx)
.Ltmp2:
#NO_APP
movl %eax, -12(%ebp)
movl %ecx, -8(%ebp)
movl -12(%ebp), %eax
addl $12, %esp
popl %ebp
retl
However, on i386 optimization does not fix it, e.g. at -O1 or higher:
ivy_rng_store: # @ivy_rng_store
# BB#0: # %entry
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %ecx
movl $10, %eax
#APP
.Ltmp0:
rdrandl %ecx
jb .Ltmp1
decl %eax
jne .Ltmp0
jmp .Ltmp2
.Ltmp1:
movl %ecx, (%ecx)
.Ltmp2:
#NO_APP
popl %ebp
retl
Changing the output constraint on 'tmp' to "+q" seems to help on amd64, but on
i386 it still produces incorrect output at -O1 or higher optimization.
I tested the above code with different versions of gcc (4.7 through 5.0), but
the resulting assembly was always as expected, at any optimization level.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20141014/fad7d4eb/attachment.html>
More information about the llvm-bugs
mailing list