<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - 8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c causes more stack usage on PowerPC"
   href="https://bugs.llvm.org/show_bug.cgi?id=49610">49610</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c causes more stack usage on PowerPC
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: PowerPC
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>natechancellor@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org, nemanja.i.ibm@gmail.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Forgive me if I am filing this in the wrong section or tagging the wrong
people, this is just based on my investigation and triage.

After
<a href="https://github.com/llvm/llvm-project/commit/8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c">https://github.com/llvm/llvm-project/commit/8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c</a>,
there was a balloon in stack usage in a particular function in the PowerPC KVM
section of the Linux kernel, which causes a build error because of -Werror.

Prior to 8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c (with Linux at 5.12-rc3 for
<a href="https://git.kernel.org/linus/97e4910232fa1f81e806aa60c25a0450276d99a2">https://git.kernel.org/linus/97e4910232fa1f81e806aa60c25a0450276d99a2</a>):

# legacy pass manager
$ make -skj"$(nproc)" ARCH=powerpc CC=clang CROSS_COMPILE=powerpc64-linux-gnu-
KCFLAGS="-fno-experimental-new-pass-manager -Wframe-larger-than=1024"
O=build/ppc64 distclean pseries_defconfig disable-werror.config
arch/powerpc/kvm/book3s_hv_nested.o
arch/powerpc/kvm/book3s_hv_nested.c:264:6: warning: stack frame size of 1728
bytes in function 'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
     ^
1 warning generated.

# new pass manager
$ make -skj"$(nproc)" ARCH=powerpc CC=clang CROSS_COMPILE=powerpc64-linux-gnu-
KCFLAGS="-fexperimental-new-pass-manager -Wframe-larger-than=1024"
O=build/ppc64 distclean pseries_defconfig disable-werror.config
arch/powerpc/kvm/book3s_hv_nested.o
arch/powerpc/kvm/book3s_hv_nested.c:264:6: warning: stack frame size of 1712
bytes in function 'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
     ^
1 warning generated.

After 8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c:

# legacy pass manager
$ make -skj"$(nproc)" ARCH=powerpc CC=clang CROSS_COMPILE=powerpc64-linux-gnu-
KCFLAGS="-fno-experimental-new-pass-manager -Wframe-larger-than=1024"
O=build/ppc64 distclean pseries_defconfig disable-werror.config
arch/powerpc/kvm/book3s_hv_nested.o
arch/powerpc/kvm/book3s_hv_nested.c:264:6: warning: stack frame size of 2480
bytes in function 'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
     ^
1 warning generated.

# new pass manager
$ make -skj"$(nproc)" ARCH=powerpc CC=clang CROSS_COMPILE=powerpc64-linux-gnu-
KCFLAGS="-fexperimental-new-pass-manager -Wframe-larger-than=1024"
O=build/ppc64 distclean pseries_defconfig disable-werror.config
arch/powerpc/kvm/book3s_hv_nested.o
arch/powerpc/kvm/book3s_hv_nested.c:264:6: warning: stack frame size of 2048
bytes in function 'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
     ^
1 warning generated.

For reference, GCC 10.2.0:

$ make -skj"$(nproc)" ARCH=powerpc ARCH=powerpc CROSS_COMPILE=powerpc64-linux-
KCFLAGS=-Wframe-larger-than=1024 O=build/ppc64 distclean pseries_defconfig
disable-werror.config arch/powerpc/kvm/book3s_hv_nested.o
arch/powerpc/kvm/book3s_hv_nested.c: In function 'kvmhv_enter_nested_guest':
arch/powerpc/kvm/book3s_hv_nested.c:387:1: warning: the frame size of 1280
bytes is larger than 1024 bytes [-Wframe-larger-than=]
  387 | }
      | ^

I tried to reduce this down with cvise and came up with:

struct hv_guest_state {
  int vcpu_token;
  long pcr;
  long amor;
  long dpdes;
  long hfscr;
  long tb_offset;
  long srr0;
  long srr1;
  long sprg[4];
  long pidr;
  long cfar;
  long ppr;
  long dawr1;
  long dawrx1;
} kvmhv_write_guest_state_and_regs_l2_hv;
struct pt_regs {
  struct {
    struct {
      long gpr[32];
      long nip;
      long msr;
      long orig_gpr3;
      long ctr;
      long link;
      long xer;
      long ccr;
      long softe;
      long trap;
      long dar;
      long dsisr;
      long result;
    };
    long __pad[4];
  };
} __srcu_read_unlock();
struct kvm_vcpu_arch {
  struct pt_regs regs;
  struct kvmppc_vcore *vcore;
  int trap;
};
struct kvmppc_vcore {
  long tb_offset;
  long pcr;
};
struct kvm_nested_guest {
  long l1_gr_to_hr;
};
int kvm_vcpu_read_guest();
static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) {
  struct kvm_nested_guest __trans_tmp_14 = *gp;
  __srcu_read_unlock(__trans_tmp_14);
}
void byteswap_pt_regs(struct pt_regs *regs) {
  unsigned long *addr = (long *)regs;
  for (; addr < (unsigned long *)(regs + 1); addr++)
    *addr = __builtin_bswap64(*addr);
}
int kvmhv_read_guest_state_and_regs(struct hv_guest_state *l2_hv,
                                    struct pt_regs *l2_regs) {
  return kvm_vcpu_read_guest(l2_hv) || kvm_vcpu_read_guest(l2_regs);
}
long kvmhv_enter_nested_guest(struct kvm_vcpu_arch *vcpu) {
  struct kvm_nested_guest *l2;
  struct pt_regs l2_regs, saved_l1_regs;
  struct hv_guest_state l2_hv;
  struct kvmppc_vcore *vc = vcpu->vcore;
  long regs_ptr = kvmhv_read_guest_state_and_regs(&l2_hv, &l2_regs);
  byteswap_pt_regs(&l2_regs);
  if (!l2->l1_gr_to_hr)
    kvmhv_update_ptbl_cache(l2);
  saved_l1_regs = vcpu->regs;
  vc->tb_offset += l2_hv.tb_offset;
  vcpu->regs = saved_l1_regs;
  vc = vcpu->vcore;
  vc->pcr = 0;
  struct hv_guest_state *hr = 0;
  hr->pidr = hr->dawr1 = __builtin_bswap64(hr->dawr1);
  hr->dawrx1 = __builtin_bswap64(hr->dawrx1);
  byteswap_pt_regs(&l2_regs);
  kvm_vcpu_read_guest(kvmhv_write_guest_state_and_regs_l2_hv, regs_ptr);
  return vcpu->trap;
}

which produces the same result: <a href="https://godbolt.org/z/754Men">https://godbolt.org/z/754Men</a>.

Prior to 8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c

# LPM
$ clang -fno-experimental-new-pass-manager --target=powerpc64-linux-gnu -O2
-Wno-pointer-sign -Wframe-larger-than=512 -c -o /dev/null book3s_hv_nested.i
book3s_hv_nested.i:63:6: warning: stack frame size of 1136 bytes in function
'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu_arch *vcpu) {
     ^
1 warning generated.

# NPM
$ clang -fexperimental-new-pass-manager --target=powerpc64-linux-gnu -O2
-Wno-pointer-sign -Wframe-larger-than=512 -c -o /dev/null book3s_hv_nested.i
book3s_hv_nested.i:63:6: warning: stack frame size of 640 bytes in function
'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu_arch *vcpu) {
     ^
1 warning generated.

After 8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c:

# LPM
$ clang -fno-experimental-new-pass-manager --target=powerpc64-linux-gnu -O2
-Wno-pointer-sign -Wframe-larger-than=512 -c -o /dev/null book3s_hv_nested.i
book3s_hv_nested.i:54:6: warning: stack frame size of 944 bytes in function
'byteswap_pt_regs' [-Wframe-larger-than=]
void byteswap_pt_regs(struct pt_regs *regs) {
     ^
book3s_hv_nested.i:63:6: warning: stack frame size of 2064 bytes in function
'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu_arch *vcpu) {
     ^
2 warnings generated.

# NPM
$ clang -fexperimental-new-pass-manager --target=powerpc64-linux-gnu -O2
-Wno-pointer-sign -Wframe-larger-than=512 -c -o /dev/null book3s_hv_nested.i
book3s_hv_nested.i:54:6: warning: stack frame size of 944 bytes in function
'byteswap_pt_regs' [-Wframe-larger-than=]
void byteswap_pt_regs(struct pt_regs *regs) {
     ^
book3s_hv_nested.i:63:6: warning: stack frame size of 1520 bytes in function
'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu_arch *vcpu) {
     ^
2 warnings generated.

Arnd Bergmann points out that this is most likely because PowerPC does not
appear to be using optimal assembly for byte swapping (or at least would help),
which I believe should be visible from the Godbolt link above.</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>