[llvm-bugs] [Bug 32150] New: Inline function not properly optimized

via llvm-bugs llvm-bugs at lists.llvm.org
Mon Mar 6 07:19:13 PST 2017


http://bugs.llvm.org/show_bug.cgi?id=32150

            Bug ID: 32150
           Summary: Inline function not properly optimized
           Product: clang
           Version: 4.0
          Hardware: PC
                OS: All
            Status: NEW
          Severity: normal
          Priority: P
         Component: -New Bugs
          Assignee: unassignedclangbugs at nondot.org
          Reporter: royger at freebsd.org
                CC: llvm-bugs at lists.llvm.org

Hello,

After updating clang to 4.0, I'm not able to build Xen anymore (note that clang
3.5-3.9 build Xen without problems). This is due to clang no longer optimising
some inline functions, resulting in a link time failure. The relevant parts of
the code are as follows:

enum xsm_default {
    XSM_HOOK,     /* Guests can normally access the hypercall */
    XSM_DM_PRIV,  /* Device model can perform on its target domain */
    XSM_TARGET,   /* Can perform on self or your target domain */
    XSM_PRIV,     /* Privileged - normally restricted to dom0 */
    XSM_XS_PRIV,  /* Xenstore domain - can do some privileged operations */
    XSM_OTHER     /* Something more complex */
};

#define LINKER_BUG_ON(x) do { if (x) __xsm_action_mismatch_detected(); } while
(0)
/* DO NOT implement this function; it is supposed to trigger link errors */
void __xsm_action_mismatch_detected(void);
#define XSM_ASSERT_ACTION(def) LINKER_BUG_ON(def != action)

static __inline__ __attribute__ ((__always_inline__)) int xsm_default_action(
    xsm_default_t action, struct domain *src, struct domain *target)
{
    switch ( action ) {
    case XSM_HOOK:
        return 0;
    case XSM_TARGET:
        if ( src == target )
        {
            return 0;
    case XSM_XS_PRIV:
            if ( src->is_xenstore )
                return 0;
        }
        /* fall through */
    case XSM_DM_PRIV:
        if ( target && src->target == target )
            return 0;
        /* fall through */
    case XSM_PRIV:
        if ( src->is_privileged )
            return 0;
        return -EPERM;
    default:
        LINKER_BUG_ON(1);
        return -EPERM;
    }
}

static __inline__ __attribute__ ((__always_inline__)) int xsm_pmu_op
(xsm_default_t action, struct domain *d, unsigned int op)
{
    XSM_ASSERT_ACTION(XSM_OTHER);
    switch ( op )
    {
    case XENPMU_mode_set:
    case XENPMU_mode_get:
    case XENPMU_feature_set:
    case XENPMU_feature_get:
        return xsm_default_action(XSM_PRIV, d, current->domain);
    case XENPMU_init:
    case XENPMU_finish:
    case XENPMU_lvtpc_set:
    case XENPMU_flush:
        return xsm_default_action(XSM_HOOK, d, current->domain);
    default:
        return -EPERM;
    }
}

long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t)
arg)
{
    int ret;
    struct vcpu *curr;
    struct xen_pmu_params pmu_params = {.val = 0};
    struct xen_pmu_data *xenpmu_data;
    struct vpmu_struct *vpmu;

    if ( !opt_vpmu_enabled || has_vlapic(current->domain) )
        return -EOPNOTSUPP;

    ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
    if ( ret )
        return ret;

    [...]
}

The code above relies on the calls to __xsm_action_mismatch_detected to be
optimized away because they are dead code, yet in this case clang generates the
following assembly:

<do_xenpmu_op>:
     bd0:       55                      push   %rbp
     bd1:       48 89 e5                mov    %rsp,%rbp
     bd4:       41 57                   push   %r15
     bd6:       41 56                   push   %r14
     bd8:       41 55                   push   %r13
     bda:       41 54                   push   %r12
     bdc:       53                      push   %rbx
     bdd:       48 83 ec 28             sub    $0x28,%rsp
     be1:       48 89 f3                mov    %rsi,%rbx
     be4:       41 89 fd                mov    %edi,%r13d
     be7:       48 c7 45 d0 00 00 00    movq   $0x0,-0x30(%rbp)
     bee:       00
     bef:       48 c7 45 c8 00 00 00    movq   $0x0,-0x38(%rbp)
     bf6:       00
     bf7:       48 c7 45 c0 00 00 00    movq   $0x0,-0x40(%rbp)
     bfe:       00
     bff:       49 c7 c4 a1 ff ff ff    mov    $0xffffffffffffffa1,%r12
     c06:       83 3d 00 00 00 00 00    cmpl   $0x0,0x0(%rip)        # c0d
<do_xenpmu_op+0x3d>
                        c08: R_X86_64_PC32      .data.read_mostly+0x3
     c0d:       0f 84 f4 02 00 00       je     f07 <do_xenpmu_op+0x337>
     c13:       e8 58 f4 ff ff          callq  70 <get_cpu_info>
     c18:       48 8b 88 d0 00 00 00    mov    0xd0(%rax),%rcx
     c1f:       4c 8b 79 10             mov    0x10(%rcx),%r15
     c23:       41 f6 87 d4 09 00 00    testb  $0x1,0x9d4(%r15)
     c2a:       01
     c2b:       0f 85 d6 02 00 00       jne    f07 <do_xenpmu_op+0x337>
     c31:       49 c7 c4 ff ff ff ff    mov    $0xffffffffffffffff,%r12
     c38:       41 83 fd 07             cmp    $0x7,%r13d
     c3c:       0f 87 c5 02 00 00       ja     f07 <do_xenpmu_op+0x337>
     c42:       48 89 45 b8             mov    %rax,-0x48(%rbp)
     c46:       4d 63 f5                movslq %r13d,%r14
     c49:       e8 22 f4 ff ff          callq  70 <get_cpu_info>
     c4e:       48 8d 0d 00 00 00 00    lea    0x0(%rip),%rcx        # c55
<do_xenpmu_op+0x85>
                        c51: R_X86_64_PC32     
.Lswitch.table+0xfffffffffffffffc
     c55:       42 8b 0c b1             mov    (%rcx,%r14,4),%ecx
     c59:       48 83 f9 04             cmp    $0x4,%rcx
     c5d:       77 29                   ja     c88 <do_xenpmu_op+0xb8>
     c5f:       48 8b 80 d0 00 00 00    mov    0xd0(%rax),%rax
     c66:       48 8b 40 10             mov    0x10(%rax),%rax
     c6a:       48 8d 15 00 00 00 00    lea    0x0(%rip),%rdx        # c71
<do_xenpmu_op+0xa1>
                        c6d: R_X86_64_PC32      .rodata+0xfffffffffffffffc
     c71:       48 63 0c 8a             movslq (%rdx,%rcx,4),%rcx
     c75:       48 01 d1                add    %rdx,%rcx
     c78:       49 89 de                mov    %rbx,%r14
     c7b:       48 8b 55 b8             mov    -0x48(%rbp),%rdx
     c7f:       ff e1                   jmpq   *%rcx
     c81:       4c 39 f8                cmp    %r15,%rax
     c84:       75 16                   jne    c9c <do_xenpmu_op+0xcc>
     c86:       eb 30                   jmp    cb8 <do_xenpmu_op+0xe8>
     c88:       e8 00 00 00 00          callq  c8d <do_xenpmu_op+0xbd>
                        c89: R_X86_64_PC32     
__xsm_action_mismatch_detected+0xfffffffffffffffc
     c8d:       e9 75 02 00 00          jmpq   f07 <do_xenpmu_op+0x337>
     c92:       41 80 bf 9b 01 00 00    cmpb   $0x0,0x19b(%r15)
     c99:       00
     c9a:       75 1c                   jne    cb8 <do_xenpmu_op+0xe8>
     c9c:       48 85 c0                test   %rax,%rax
     c9f:       74 09                   je     caa <do_xenpmu_op+0xda>
[...]

Which makes the link fail later on, due to the unresolved
_xsm_action_mismatch_detected symbol. I can fix that by simply changing
xsm_pmu_op so it becomes:

static __inline__ __attribute__ ((__always_inline__)) int xsm_pmu_op
(xsm_default_t action, struct domain *d, unsigned int op)
{
    XSM_ASSERT_ACTION(XSM_OTHER);
    switch ( op )
    {
    case XENPMU_init:
    case XENPMU_finish:
    case XENPMU_lvtpc_set:
    case XENPMU_flush:
        return xsm_default_action(XSM_HOOK, d, current->domain);
    default:
        return xsm_default_action(XSM_PRIV, d, current->domain);
    }
}

But I would like to understand why this fails, and whether such issues can be
avoided in the future.

Thanks, Roger.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170306/2056e2e3/attachment-0001.html>


More information about the llvm-bugs mailing list