[PATCH] D68563: [AMDGPU] Disable a test that was relying on misched behavior
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 9 03:33:04 PDT 2019
foad added a comment.
In D68563#1698059 <https://reviews.llvm.org/D68563#1698059>, @arsenm wrote:
> I'm curious what the scheduler is able to do here? Everything is volatile and non-reorderable
It moves a bunch of sgpr to vgpr copies:
# *** IR Dump Before Machine Instruction Scheduler ***:
# Machine code for function max_9_sgprs: NoPHIs, TracksLiveness
0B bb.0 (%ir-block.0):
16B %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %13:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
32B %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %15:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
48B %6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %17:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
64B %7:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %19:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
80B %8:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %21:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
96B %9:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %23:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
112B %10:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %25:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
128B %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %27:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
144B %28:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %29:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
160B %30:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %31:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
176B INLINEASM &"" [sideeffect] [attdialect], $0:[reguse:SReg_32_XM0], %4:sreg_32_xm0_xexec, $1:[reguse:SReg_32_XM0], %5:sreg_32_xm0_xexec, $2:[reguse:SReg_32_XM0], %6:sreg_32_xm0_xexec, $3:[reguse:SReg_32_XM0], %7:sreg_32_xm0_xexec, $4:[reguse:SReg_32_XM0], %8:sreg_32_xm0_xexec, $5:[reguse:SReg_32_XM0], %9:sreg_32_xm0_xexec, $6:[reguse:SReg_32_XM0], %10:sreg_32_xm0_xexec, $7:[reguse:SReg_32_XM0], %11:sreg_32_xm0_xexec
192B %34:vgpr_32 = COPY %4:sreg_32_xm0_xexec
208B FLAT_STORE_DWORD undef %33:vreg_64, %34:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
224B %37:vgpr_32 = COPY %5:sreg_32_xm0_xexec
240B FLAT_STORE_DWORD undef %36:vreg_64, %37:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
256B %40:vgpr_32 = COPY %6:sreg_32_xm0_xexec
272B FLAT_STORE_DWORD undef %39:vreg_64, %40:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
288B %43:vgpr_32 = COPY %7:sreg_32_xm0_xexec
304B FLAT_STORE_DWORD undef %42:vreg_64, %43:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
320B %46:vgpr_32 = COPY %8:sreg_32_xm0_xexec
336B FLAT_STORE_DWORD undef %45:vreg_64, %46:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
352B %49:vgpr_32 = COPY %9:sreg_32_xm0_xexec
368B FLAT_STORE_DWORD undef %48:vreg_64, %49:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
384B %52:vgpr_32 = COPY %10:sreg_32_xm0_xexec
400B FLAT_STORE_DWORD undef %51:vreg_64, %52:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
416B %55:vgpr_32 = COPY %11:sreg_32_xm0_xexec
432B FLAT_STORE_DWORD undef %54:vreg_64, %55:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
448B %58:vgpr_32 = COPY %28:sreg_32_xm0_xexec
464B FLAT_STORE_DWORD undef %57:vreg_64, %58:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
480B %61:vgpr_32 = COPY %30:sreg_32_xm0_xexec
496B FLAT_STORE_DWORD undef %60:vreg_64, %61:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
512B S_ENDPGM 0
# End machine code for function max_9_sgprs.
# *** IR Dump After Machine Instruction Scheduler ***:
# Machine code for function max_9_sgprs: NoPHIs, TracksLiveness
0B bb.0 (%ir-block.0):
16B %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %13:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
32B %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %15:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
48B %6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %17:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
64B %7:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %19:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
72B %34:vgpr_32 = COPY %4:sreg_32_xm0_xexec
76B %37:vgpr_32 = COPY %5:sreg_32_xm0_xexec
84B %40:vgpr_32 = COPY %6:sreg_32_xm0_xexec
88B %43:vgpr_32 = COPY %7:sreg_32_xm0_xexec
92B %8:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %21:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
100B %46:vgpr_32 = COPY %8:sreg_32_xm0_xexec
108B %9:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %23:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
116B %49:vgpr_32 = COPY %9:sreg_32_xm0_xexec
124B %10:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %25:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
132B %52:vgpr_32 = COPY %10:sreg_32_xm0_xexec
140B %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %27:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
148B %55:vgpr_32 = COPY %11:sreg_32_xm0_xexec
156B %28:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %29:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
164B %58:vgpr_32 = COPY %28:sreg_32_xm0_xexec
172B %30:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %31:sreg_64, 0, 0, 0 :: (volatile load 4 from `i32 addrspace(4)* undef`, addrspace 4)
176B INLINEASM &"" [sideeffect] [attdialect], $0:[reguse:SReg_32_XM0], %4:sreg_32_xm0_xexec, $1:[reguse:SReg_32_XM0], %5:sreg_32_xm0_xexec, $2:[reguse:SReg_32_XM0], %6:sreg_32_xm0_xexec, $3:[reguse:SReg_32_XM0], %7:sreg_32_xm0_xexec, $4:[reguse:SReg_32_XM0], %8:sreg_32_xm0_xexec, $5:[reguse:SReg_32_XM0], %9:sreg_32_xm0_xexec, $6:[reguse:SReg_32_XM0], %10:sreg_32_xm0_xexec, $7:[reguse:SReg_32_XM0], %11:sreg_32_xm0_xexec
208B FLAT_STORE_DWORD undef %33:vreg_64, %34:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
240B FLAT_STORE_DWORD undef %36:vreg_64, %37:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
272B FLAT_STORE_DWORD undef %39:vreg_64, %40:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
304B FLAT_STORE_DWORD undef %42:vreg_64, %43:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
336B FLAT_STORE_DWORD undef %45:vreg_64, %46:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
368B FLAT_STORE_DWORD undef %48:vreg_64, %49:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
400B FLAT_STORE_DWORD undef %51:vreg_64, %52:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
432B FLAT_STORE_DWORD undef %54:vreg_64, %55:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
464B FLAT_STORE_DWORD undef %57:vreg_64, %58:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
480B %61:vgpr_32 = COPY %30:sreg_32_xm0_xexec
496B FLAT_STORE_DWORD undef %60:vreg_64, %61:vgpr_32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
512B S_ENDPGM 0
# End machine code for function max_9_sgprs.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D68563/new/
https://reviews.llvm.org/D68563
More information about the llvm-commits
mailing list