[llvm-branch-commits] [llvm] 20eced2 - [X86][SchedModel] Add missing ReadAdvance for some arithmetic ops (PR51318 and PR51322).
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 11 21:40:22 PDT 2021
Author: Andrea Di Biagio
Date: 2021-08-11T21:40:03-07:00
New Revision: 20eced2cb0130869379cb0a959300ea85bee1f38
URL: https://github.com/llvm/llvm-project/commit/20eced2cb0130869379cb0a959300ea85bee1f38
DIFF: https://github.com/llvm/llvm-project/commit/20eced2cb0130869379cb0a959300ea85bee1f38.diff
LOG: [X86][SchedModel] Add missing ReadAdvance for some arithmetic ops (PR51318 and PR51322).
This fixes a bug where implicit uses of EFLAGS were not marked as ReadAdvance in
the RM/MR variants of ADC/SBB (PR51318)
This also fixes the absence of ReadAdvance for the register operand of
RMW arithmetic instructions (PR51322).
Differential Revision: https://reviews.llvm.org/D107367
(cherry picked from commit 7a1a35a1d1ae2e69769505c9f39910067c53d53b)
Added:
Modified:
llvm/lib/Target/X86/X86InstrArithmetic.td
llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index e83e1e74ff526..ba00e7da81f99 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+// BinOpRM - Instructions like "adc reg, reg, [mem]".
+// There is an implicit register read at the end of the operand sequence.
+class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ // base, scale, index, offset, segment.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // implicit register read.
+ sched.ReadAfterFold]>;
+
// BinOpRM_F - Instructions like "cmp reg, [mem]".
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
@@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
- : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
+ : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
EFLAGS))]>;
@@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteALURMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteALURMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold]>; // reg
// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteADCRMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold, // reg
+ WriteALU.ReadAfterFold]>; // EFLAGS
// BinOpMR_F - Instructions like "cmp [mem], reg".
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
index 68537e4f76746..4d563adf0cbf6 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
@@ -1,19 +1,16 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=4 -timeline < %s | FileCheck %s
-# FIXME: PR51318
-# Missing read-advance for the implicit use of register EFLAGS.
-
adc 4(%rsp), %eax
# CHECK: Iterations: 4
# CHECK-NEXT: Instructions: 4
-# CHECK-NEXT: Total Cycles: 19
+# CHECK-NEXT: Total Cycles: 10
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.21
-# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
@@ -52,13 +49,12 @@ adc 4(%rsp), %eax
# CHECK-NEXT: 1.00 1.00 - - - - - 1.00 - - - - - - adcl 4(%rsp), %eax
# CHECK: Timeline view:
-# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . . . adcl 4(%rsp), %eax
-# CHECK-NEXT: [1,0] D====eeeeER . . adcl 4(%rsp), %eax
-# CHECK-NEXT: [2,0] .D=======eeeeER. . adcl 4(%rsp), %eax
-# CHECK-NEXT: [3,0] .D===========eeeeER adcl 4(%rsp), %eax
+# CHECK: [0,0] DeeeeER . adcl 4(%rsp), %eax
+# CHECK-NEXT: [1,0] D=eeeeER . adcl 4(%rsp), %eax
+# CHECK-NEXT: [2,0] .D=eeeeER. adcl 4(%rsp), %eax
+# CHECK-NEXT: [3,0] .D==eeeeER adcl 4(%rsp), %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -67,4 +63,4 @@ adc 4(%rsp), %eax
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 4 6.5 0.3 0.0 adcl 4(%rsp), %eax
+# CHECK-NEXT: 0. 4 2.0 0.3 0.0 adcl 4(%rsp), %eax
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
index 16387c6c26e95..ef25a48f4a724 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
@@ -1,20 +1,17 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
-# FIXME: PR51322
-# Missing read-advance for register EAX.
-
add %eax, %eax
adc %eax, 4(%rsp)
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 2
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.20
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.22
+# CHECK-NEXT: IPC: 0.22
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
@@ -55,10 +52,10 @@ adc %eax, 4(%rsp)
# CHECK-NEXT: 2.00 - - - - - - 1.00 - 1.00 - - - - adcl %eax, 4(%rsp)
# CHECK: Timeline view:
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 012345678
-# CHECK: [0,0] DeER . . addl %eax, %eax
-# CHECK-NEXT: [0,1] D=eeeeeeER adcl %eax, 4(%rsp)
+# CHECK: [0,0] DeER . . addl %eax, %eax
+# CHECK-NEXT: [0,1] DeeeeeeER adcl %eax, 4(%rsp)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -68,5 +65,5 @@ adc %eax, 4(%rsp)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 addl %eax, %eax
-# CHECK-NEXT: 1. 1 2.0 0.0 0.0 adcl %eax, 4(%rsp)
-# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 adcl %eax, 4(%rsp)
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s
index 7ff8884da8522..a8da4515b315e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s
@@ -1,20 +1,17 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
-# FIXME: PR51322
-# Missing read-advance for register EAX.
-
add %eax, %eax
add %eax, 4(%rsp)
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 2
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.20
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.22
+# CHECK-NEXT: IPC: 0.22
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
@@ -55,10 +52,10 @@ add %eax, 4(%rsp)
# CHECK-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addl %eax, 4(%rsp)
# CHECK: Timeline view:
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 012345678
-# CHECK: [0,0] DeER . . addl %eax, %eax
-# CHECK-NEXT: [0,1] D=eeeeeeER addl %eax, 4(%rsp)
+# CHECK: [0,0] DeER . . addl %eax, %eax
+# CHECK-NEXT: [0,1] DeeeeeeER addl %eax, 4(%rsp)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -68,5 +65,5 @@ add %eax, 4(%rsp)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 addl %eax, %eax
-# CHECK-NEXT: 1. 1 2.0 0.0 0.0 addl %eax, 4(%rsp)
-# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 addl %eax, 4(%rsp)
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
More information about the llvm-branch-commits
mailing list