[llvm] r343708 - [X86] PUSH/POP 'mem-mem' instructions are not RMW - these are 2 different addresses
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 3 12:02:38 PDT 2018
Author: rksimon
Date: Wed Oct 3 12:02:38 2018
New Revision: 343708
URL: http://llvm.org/viewvc/llvm-project?rev=343708&view=rev
Log:
[X86] PUSH/POP 'mem-mem' instructions are not RMW - these are 2 different addresses
This patch adds a 'WriteCopy' [WriteLoad, WriteStore] schedule sequence instead to better model the behaviour
Found by @andreadb during llvm-mca testing on btver2 which was crashing on "zero uop" WriteRMW only instructions
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=343708&r1=343707&r2=343708&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed Oct 3 12:02:38 2018
@@ -1210,12 +1210,12 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:
OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
-let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
+let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
OpSize16;
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
OpSize32, Requires<[Not64BitMode]>;
-} // mayStore, mayLoad, WriteRMW
+} // mayStore, mayLoad, SchedRW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
@@ -1243,7 +1243,7 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs)
Requires<[Not64BitMode]>;
} // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
OpSize16;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
@@ -1302,7 +1302,7 @@ def POP64rmr: I<0x8F, MRM0r, (outs GR64:
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
OpSize32, Requires<[In64BitMode]>;
let mayStore = 1, SchedRW = [WriteStore] in {
@@ -1314,7 +1314,7 @@ def PUSH64rmr: I<0xFF, MRM6r, (outs), (i
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
OpSize32, Requires<[In64BitMode]>;
} // mayLoad, mayStore, SchedRW
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=343708&r1=343707&r2=343708&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Wed Oct 3 12:02:38 2018
@@ -107,6 +107,7 @@ def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
def WriteStoreNT : SchedWrite;
def WriteMove : SchedWrite;
+def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll?rev=343708&r1=343707&r2=343708&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll Wed Oct 3 12:02:38 2018
@@ -1676,9 +1676,9 @@ define i16 @test_pop_push_16(i16 %a0, i1
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
; SLM-NEXT: popw %ax # sched: [3:1.00]
-; SLM-NEXT: popw (%ecx) # sched: [1:1.00]
+; SLM-NEXT: popw (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushw %ax # sched: [1:1.00]
-; SLM-NEXT: pushw (%ecx) # sched: [1:1.00]
+; SLM-NEXT: pushw (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushw $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushw $7 # sched: [1:1.00]
@@ -1766,9 +1766,9 @@ define i16 @test_pop_push_16(i16 %a0, i1
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popw %ax # sched: [5:1.00]
-; BTVER2-NEXT: popw (%ecx) # sched: [1:1.00]
+; BTVER2-NEXT: popw (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushw %ax # sched: [1:1.00]
-; BTVER2-NEXT: pushw (%ecx) # sched: [1:1.00]
+; BTVER2-NEXT: pushw (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
@@ -1828,9 +1828,9 @@ define i32 @test_pop_push_32(i32 %a0, i3
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
; SLM-NEXT: popl %eax # sched: [3:1.00]
-; SLM-NEXT: popl (%ecx) # sched: [1:1.00]
+; SLM-NEXT: popl (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushl %eax # sched: [1:1.00]
-; SLM-NEXT: pushl (%ecx) # sched: [1:1.00]
+; SLM-NEXT: pushl (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushl $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushl $7 # sched: [1:1.00]
@@ -1918,9 +1918,9 @@ define i32 @test_pop_push_32(i32 %a0, i3
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popl %eax # sched: [5:1.00]
-; BTVER2-NEXT: popl (%ecx) # sched: [1:1.00]
+; BTVER2-NEXT: popl (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushl %eax # sched: [1:1.00]
-; BTVER2-NEXT: pushl (%ecx) # sched: [1:1.00]
+; BTVER2-NEXT: pushl (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushl $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushl $7 # sched: [1:1.00]
@@ -1933,7 +1933,7 @@ define i32 @test_pop_push_32(i32 %a0, i3
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: popl %eax # sched: [8:0.50]
-; ZNVER1-NEXT: popl (%ecx) # sched: [1:0.50]
+; ZNVER1-NEXT: popl (%ecx) # sched: [9:1.00]
; ZNVER1-NEXT: pushl %eax # sched: [1:0.50]
; ZNVER1-NEXT: pushl (%ecx) # sched: [4:0.50]
; ZNVER1-NEXT: pushl $4095 # imm = 0xFFF
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=343708&r1=343707&r2=343708&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Wed Oct 3 12:02:38 2018
@@ -9648,9 +9648,9 @@ define i16 @test_pop_push_16(i16 %a0, i1
; SLM: # %bb.0:
; SLM-NEXT: #APP
; SLM-NEXT: popw %ax # sched: [3:1.00]
-; SLM-NEXT: popw (%rsi) # sched: [1:1.00]
+; SLM-NEXT: popw (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushw %di # sched: [1:1.00]
-; SLM-NEXT: pushw (%rsi) # sched: [1:1.00]
+; SLM-NEXT: pushw (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushw $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushw $7 # sched: [1:1.00]
@@ -9726,9 +9726,9 @@ define i16 @test_pop_push_16(i16 %a0, i1
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popw %ax # sched: [5:1.00]
-; BTVER2-NEXT: popw (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: popw (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushw %di # sched: [1:1.00]
-; BTVER2-NEXT: pushw (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: pushw (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
@@ -9781,9 +9781,9 @@ define i64 @test_pop_push_64(i64 %a0, i6
; SLM: # %bb.0:
; SLM-NEXT: #APP
; SLM-NEXT: popq %rax # sched: [3:1.00]
-; SLM-NEXT: popq (%rsi) # sched: [1:1.00]
+; SLM-NEXT: popq (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushq %rdi # sched: [1:1.00]
-; SLM-NEXT: pushq (%rsi) # sched: [1:1.00]
+; SLM-NEXT: pushq (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushq $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushq $7 # sched: [1:1.00]
@@ -9859,9 +9859,9 @@ define i64 @test_pop_push_64(i64 %a0, i6
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popq %rax # sched: [5:1.00]
-; BTVER2-NEXT: popq (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: popq (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushq %rdi # sched: [1:1.00]
-; BTVER2-NEXT: pushq (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: pushq (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushq $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushq $7 # sched: [1:1.00]
@@ -9872,9 +9872,9 @@ define i64 @test_pop_push_64(i64 %a0, i6
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: popq %rax # sched: [8:0.50]
-; ZNVER1-NEXT: popq (%rsi) # sched: [1:0.50]
+; ZNVER1-NEXT: popq (%rsi) # sched: [9:1.00]
; ZNVER1-NEXT: pushq %rdi # sched: [1:0.50]
-; ZNVER1-NEXT: pushq (%rsi) # sched: [1:0.50]
+; ZNVER1-NEXT: pushq (%rsi) # sched: [9:1.00]
; ZNVER1-NEXT: pushq $4095 # imm = 0xFFF
; ZNVER1-NEXT: # sched: [1:0.50]
; ZNVER1-NEXT: pushq $7 # sched: [1:0.50]
More information about the llvm-commits
mailing list