[PATCH] D132073: [CodeGen] Zero out only modified registers
Bill Wendling via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 1 15:35:54 PDT 2022
void added a comment.
In D132073#3765433 <https://reviews.llvm.org/D132073#3765433>, @efriedma wrote:
> In D132073#3765379 <https://reviews.llvm.org/D132073#3765379>, @void wrote:
>
>> In D132073#3765159 <https://reviews.llvm.org/D132073#3765159>, @efriedma wrote:
>>
>>>> we shouldn't be zeroing %rdi
>>>
>>> I don't see how it's a problem if we zero rdi. Even if we don't modify rdi, the caller doesn't have any way of knowing it wasn't modified. The calling convention says it's caller-save.
>>
>> This is just a test case. It's not meant to be definitive of the original bug. Just show the same symptoms.
>
> The thing is, I don't see the connection between this "symptom" and any possible bug. rdi is callee-clobbered, and the caller has no way of knowing whether the implementation actually writes to rdi, therefore it can't be a bug to add a write to rdi.
I understand. Here's an example of what's going on, at least with the Linux kernel. A function is defined like this:
u64 __attribute__((no_instrument_function)) _paravirt_ident_64(u64 x)
{
return x;
}
/* ... */
struct paravirt_patch_template pv_ops = {
/* ... */
.mmu.pmd_val = ((struct paravirt_callee_save) { _paravirt_ident_64 }),
/* ... */
};
The generated code is like this:
.text
.globl _paravirt_ident_64 # -- Begin function _paravirt_ident_64
.p2align 4, 0x90
.type _paravirt_ident_64, at function
_paravirt_ident_64: # @_paravirt_ident_64
.Lfunc_begin2:
.loc 0 85 0 # arch/x86/kernel/paravirt.c:85:0
.cfi_startproc
# %bb.0:
#DEBUG_VALUE: _paravirt_ident_64:x <- $rdi
movq %rdi, %rax
.Ltmp22:
.loc 0 86 2 prologue_end # arch/x86/kernel/paravirt.c:86:2
xorl %edi, %edi
.Ltmp23:
#DEBUG_VALUE: _paravirt_ident_64:x <- $rax
jmp __x86_return_thunk # TAILCALL
.Ltmp24:
.Lfunc_end2:
.size _paravirt_ident_64, .Lfunc_end2-_paravirt_ident_64
.cfi_endproc
As you mentioned, this *should* be okay, as `%rdi` is caller-saved. However, when it's used in a call, like this:
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__))
__attribute__((no_instrument_function)) pmdval_t
native_pmd_val(pmd_t pmd)
{
return pmd.pmd;
}
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__))
__attribute__((no_instrument_function)) int
pmd_none(pmd_t pmd)
{
unsigned long val = native_pmd_val(pmd);
return (val & ~((((pteval_t)(1)) << 6) | (((pteval_t)(1)) << 5))) == 0;
}
int pmd_huge(pmd_t pmd)
{
return !pmd_none(pmd) && (pmd_val(pmd) & ((((pteval_t)(1)) << 0) |
(((pteval_t)(1)) << 7))) !=
(((pteval_t)(1)) << 0);
}
The code generated is something like this:
.globl pmd_huge # -- Begin function pmd_huge
.p2align 4, 0x90
.type pmd_huge, at function
pmd_huge: # @pmd_huge
.Lfunc_begin0:
.loc 0 28 0 # arch/x86/mm/hugetlbpage.c:28:0
.cfi_sections .debug_frame
.cfi_startproc
# %bb.0:
callq __fentry__
.Ltmp1:
#DEBUG_VALUE: pmd_huge:pmd <- $rdi
#DEBUG_VALUE: pmd_none:pmd <- $rdi
#DEBUG_VALUE: pmd_none:val <- $rdi
.file 161 "./arch/x86/include/asm" "pgtable.h" md5 0x203fee1b33aab4131e5eb70cbeeb9a3d
.loc 161 793 41 prologue_end # ./arch/x86/include/asm/pgtable.h:793:41
testq $-97, %rdi
.Ltmp2:
.loc 0 29 24 # arch/x86/mm/hugetlbpage.c:29:24
je .LBB0_1
.Ltmp3:
# %bb.2:
#DEBUG_VALUE: pmd_huge:pmd <- $rdi
#DEBUG_VALUE: pmd_val:pmd <- $rdi
#DEBUG_VALUE: __edi <- undef
#DEBUG_VALUE: __esi <- undef
#DEBUG_VALUE: __edx <- undef
#DEBUG_VALUE: __ecx <- undef
#DEBUG_VALUE: __eax <- undef
.file 162 "./arch/x86/include/asm" "paravirt.h" md5 0x1f771689a20be93009d7ef8574461232
.loc 162 458 9 # ./arch/x86/include/asm/paravirt.h:458:9
#APP
# ALT: oldnstr
.Ltmp4:
.Ltmp5:
.Ltmp6:
.section .discard.retpoline_safe,"", at progbits
.quad .Ltmp6
.text
callq *pv_ops+536(%rip)
.Ltmp7:
.section .parainstructions,"a", at progbits
.p2align 3, 0x0
.quad .Ltmp5
.byte 67
.byte .Ltmp7-.Ltmp5
.short 1
.text
.Ltmp8:
# ALT: padding
.zero (-(((.Ltmp9-.Ltmp10)-(.Ltmp8-.Ltmp4))>0))*((.Ltmp9-.Ltmp10)-(.Ltmp8-.Ltmp4)),144
.Ltmp11:
.section .altinstructions,"a", at progbits
.Ltmp12:
.long .Ltmp4-.Ltmp12
.Ltmp13:
.long .Ltmp10-.Ltmp13
.short 33040
.byte .Ltmp11-.Ltmp4
.byte .Ltmp9-.Ltmp10
.text
.section .altinstr_replacement,"ax", at progbits
# ALT: replacement 1
.Ltmp10:
movq %rdi, %rax
.Ltmp9:
.text
#NO_APP
movq %rax, %rcx
.Ltmp14:
#DEBUG_VALUE: __eax <- $rax
.loc 0 30 17 # arch/x86/mm/hugetlbpage.c:30:17
andl $129, %ecx
.loc 0 30 46 is_stmt 0 # arch/x86/mm/hugetlbpage.c:30:46
xorl %eax, %eax
.Ltmp15:
cmpl $1, %ecx
setne %al
jmp .LBB0_3
.Ltmp16:
.LBB0_1:
#DEBUG_VALUE: pmd_huge:pmd <- $rdi
#DEBUG_VALUE: pmd_none:pmd <- $rdi
#DEBUG_VALUE: pmd_none:val <- $rdi
.loc 0 0 46 # arch/x86/mm/hugetlbpage.c:0:46
xorl %eax, %eax
.Ltmp17:
.LBB0_3:
#DEBUG_VALUE: pmd_huge:pmd <- $rdi
.loc 0 29 2 is_stmt 1 # arch/x86/mm/hugetlbpage.c:29:2
xorl %ecx, %ecx
xorl %edi, %edi
.Ltmp18:
#DEBUG_VALUE: pmd_huge:pmd <- [DW_OP_LLVM_entry_value 1] $rdi
jmp __x86_return_thunk # TAILCALL
.Ltmp19:
.Lfunc_end0:
.size pmd_huge, .Lfunc_end0-pmd_huge
.cfi_endproc
# -- End function
The important bit is the `movq %rdi, %rax` right after the `callq *pv_ops+536(%rip)` instruction. The `rdi` register isn't preserved across the call, and is in fact zeroed out so the return value, which was already placed in `rax` by `pmd_val`, is overwritten by zero. My thinking was that we don't want to zero out registers that aren't modified in the called function, because they're not expected to be stomped on. Perhaps there's something else going on?
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D132073/new/
https://reviews.llvm.org/D132073
More information about the llvm-commits
mailing list