[PATCH] D68563: [AMDGPU] Disable a test that was relying on misched behavior

Jay Foad via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 9 03:33:04 PDT 2019


foad added a comment.

In D68563#1698059 <https://reviews.llvm.org/D68563#1698059>, @arsenm wrote:

> I'm curious what the scheduler is able to do here? Everything is volatile and non-reorderable


It moves a bunch of sgpr to vgpr copies:

  # *** IR Dump Before Machine Instruction Scheduler ***:
  # Machine code for function max_9_sgprs: NoPHIs, TracksLiveness
  
  0B      bb.0 (%ir-block.0):
  16B       %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %13:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  32B       %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %15:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  48B       %6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %17:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  64B       %7:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %19:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  80B       %8:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %21:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  96B       %9:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %23:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  112B      %10:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %25:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  128B      %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %27:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  144B      %28:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %29:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  160B      %30:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %31:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  176B      INLINEASM &"" [sideeffect] [attdialect], $0:[reguse:SReg_32_XM0], %4:sreg_32_xm0_xexec, $1:[reguse:SReg_32_XM0], %5:sreg_32_xm0_xexec, $2:[reguse:SReg_32_XM0], %6:sreg_32_xm0_xexec, $3:[reguse:SReg_32_XM0], %7:sreg_32_xm0_xexec, $4:[reguse:SReg_32_XM0], %8:sreg_32_xm0_xexec, $5:[reguse:SReg_32_XM0], %9:sreg_32_xm0_xexec, $6:[reguse:SReg_32_XM0], %10:sreg_32_xm0_xexec, $7:[reguse:SReg_32_XM0], %11:sreg_32_xm0_xexec
  192B      %34:vgpr_32 = COPY %4:sreg_32_xm0_xexec
  208B      FLAT_STORE_DWORD undef %33:vreg_64, %34:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  224B      %37:vgpr_32 = COPY %5:sreg_32_xm0_xexec
  240B      FLAT_STORE_DWORD undef %36:vreg_64, %37:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  256B      %40:vgpr_32 = COPY %6:sreg_32_xm0_xexec
  272B      FLAT_STORE_DWORD undef %39:vreg_64, %40:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  288B      %43:vgpr_32 = COPY %7:sreg_32_xm0_xexec
  304B      FLAT_STORE_DWORD undef %42:vreg_64, %43:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  320B      %46:vgpr_32 = COPY %8:sreg_32_xm0_xexec
  336B      FLAT_STORE_DWORD undef %45:vreg_64, %46:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  352B      %49:vgpr_32 = COPY %9:sreg_32_xm0_xexec
  368B      FLAT_STORE_DWORD undef %48:vreg_64, %49:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  384B      %52:vgpr_32 = COPY %10:sreg_32_xm0_xexec
  400B      FLAT_STORE_DWORD undef %51:vreg_64, %52:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  416B      %55:vgpr_32 = COPY %11:sreg_32_xm0_xexec
  432B      FLAT_STORE_DWORD undef %54:vreg_64, %55:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  448B      %58:vgpr_32 = COPY %28:sreg_32_xm0_xexec
  464B      FLAT_STORE_DWORD undef %57:vreg_64, %58:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  480B      %61:vgpr_32 = COPY %30:sreg_32_xm0_xexec
  496B      FLAT_STORE_DWORD undef %60:vreg_64, %61:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  512B      S_ENDPGM 0
  
  # End machine code for function max_9_sgprs.
  
  # *** IR Dump After Machine Instruction Scheduler ***:
  # Machine code for function max_9_sgprs: NoPHIs, TracksLiveness
  
  0B      bb.0 (%ir-block.0):
  16B       %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %13:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  32B       %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %15:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  48B       %6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %17:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  64B       %7:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %19:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  72B       %34:vgpr_32 = COPY %4:sreg_32_xm0_xexec
  76B       %37:vgpr_32 = COPY %5:sreg_32_xm0_xexec
  84B       %40:vgpr_32 = COPY %6:sreg_32_xm0_xexec
  88B       %43:vgpr_32 = COPY %7:sreg_32_xm0_xexec
  92B       %8:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %21:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  100B      %46:vgpr_32 = COPY %8:sreg_32_xm0_xexec
  108B      %9:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %23:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  116B      %49:vgpr_32 = COPY %9:sreg_32_xm0_xexec
  124B      %10:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %25:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  132B      %52:vgpr_32 = COPY %10:sreg_32_xm0_xexec
  140B      %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %27:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  148B      %55:vgpr_32 = COPY %11:sreg_32_xm0_xexec
  156B      %28:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %29:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  164B      %58:vgpr_32 = COPY %28:sreg_32_xm0_xexec
  172B      %30:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %31:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  176B      INLINEASM &"" [sideeffect] [attdialect], $0:[reguse:SReg_32_XM0], %4:sreg_32_xm0_xexec, $1:[reguse:SReg_32_XM0], %5:sreg_32_xm0_xexec, $2:[reguse:SReg_32_XM0], %6:sreg_32_xm0_xexec, $3:[reguse:SReg_32_XM0], %7:sreg_32_xm0_xexec, $4:[reguse:SReg_32_XM0], %8:sreg_32_xm0_xexec, $5:[reguse:SReg_32_XM0], %9:sreg_32_xm0_xexec, $6:[reguse:SReg_32_XM0], %10:sreg_32_xm0_xexec, $7:[reguse:SReg_32_XM0], %11:sreg_32_xm0_xexec
  208B      FLAT_STORE_DWORD undef %33:vreg_64, %34:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  240B      FLAT_STORE_DWORD undef %36:vreg_64, %37:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  272B      FLAT_STORE_DWORD undef %39:vreg_64, %40:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  304B      FLAT_STORE_DWORD undef %42:vreg_64, %43:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  336B      FLAT_STORE_DWORD undef %45:vreg_64, %46:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  368B      FLAT_STORE_DWORD undef %48:vreg_64, %49:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  400B      FLAT_STORE_DWORD undef %51:vreg_64, %52:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  432B      FLAT_STORE_DWORD undef %54:vreg_64, %55:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  464B      FLAT_STORE_DWORD undef %57:vreg_64, %58:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  480B      %61:vgpr_32 = COPY %30:sreg_32_xm0_xexec
  496B      FLAT_STORE_DWORD undef %60:vreg_64, %61:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
  512B      S_ENDPGM 0
  
  # End machine code for function max_9_sgprs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68563/new/

https://reviews.llvm.org/D68563





More information about the llvm-commits mailing list