[llvm-bugs] [Bug 31797] New: Poor code generation depending on constexprness of function

via llvm-bugs llvm-bugs at lists.llvm.org
Sun Jan 29 17:59:54 PST 2017


https://llvm.org/bugs/show_bug.cgi?id=31797

            Bug ID: 31797
           Summary: Poor code generation depending on constexprness of
                    function
           Product: clang
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: normal
          Priority: P
         Component: C++
          Assignee: unassignedclangbugs at nondot.org
          Reporter: ldionne.2 at gmail.com
                CC: dgregor at apple.com, llvm-bugs at lists.llvm.org
    Classification: Unclassified

Hi,

The following snippet results in much different codegen depending on whether
the `make_pair` function is marked as `constexpr` or not:

  #include <utility>

  template <typename F, typename S>
  // constexpr // <--- TRY UNCOMMENTING THIS, THE DYNAMIC CALL DISAPPEARS
  std::pair<F, S> make_pair(F f, S s) { return std::pair<F, S>{f, s}; }

  template <typename Pair>
  struct map {
    explicit constexpr map(Pair p) : pair_(p) { }
    Pair pair_;
  };

  template <typename Pair>
  constexpr map<Pair> make_map(Pair pair)
  { return map<Pair>{pair}; }

  struct increment_tag { };
  using VTable = map<std::pair<increment_tag, void (*)(void*)>>;

  template <typename T>
  void increment(void* self) { ++*static_cast<T*>(self); }

  template <typename T>
  static VTable const vtable = make_map(make_pair(increment_tag{},
&increment<T>));

  struct any_iterator {
    template <typename It>
    explicit any_iterator(It it) : vptr_{&vtable<It>}, self_{new It(it)} { }
    VTable const* vptr_;
    void* self_;
  };

  int main() {
    int array[3] = {0};
    any_iterator it{&array[0]};
    it.vptr_->pair_.second(it.self_);
  }

Without the `constexpr` qualifier, the generated code (-O3) is this:

  main:                                   # @main
          sub     rsp, 24
          mov     dword ptr [rsp + 16], 0
          mov     qword ptr [rsp + 8], 0
          mov     edi, 8
          call    operator new(unsigned long)
          lea     rcx, [rsp + 8]
          mov     qword ptr [rax], rcx
          mov     rdi, rax
          call    qword ptr [rip + vtable<int*>+8]
          xor     eax, eax
          add     rsp, 24
          ret

  __cxx_global_var_init:                  # @__cxx_global_var_init
          mov     qword ptr [rip + vtable<int*>+8], void increment<int*>(void*)
          ret

  void increment<int*>(void*):                    # @void
increment<int*>(void*)
          add     qword ptr [rdi], 4
          ret


With the `constexpr` qualifier, the optimizer seems to see through the
initialization of the vtable, and it is able to collapse everything:

    main:                                   # @main
        xor     eax, eax
        ret


Same example on Godbolt: https://godbolt.org/g/dSzAe2

More involved example on Wandbox, where the codegen difference makes a
significant performance difference:
http://melpon.org/wandbox/permlink/06SYXAs9q5D19sxx

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170130/bb67f563/attachment-0001.html>


More information about the llvm-bugs mailing list