[llvm-branch-commits] [llvm] 20eced2 - [X86][SchedModel] Add missing ReadAdvance for some arithmetic ops (PR51318 and PR51322).

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 11 21:40:22 PDT 2021


Author: Andrea Di Biagio
Date: 2021-08-11T21:40:03-07:00
New Revision: 20eced2cb0130869379cb0a959300ea85bee1f38

URL: https://github.com/llvm/llvm-project/commit/20eced2cb0130869379cb0a959300ea85bee1f38
DIFF: https://github.com/llvm/llvm-project/commit/20eced2cb0130869379cb0a959300ea85bee1f38.diff

LOG: [X86][SchedModel] Add missing ReadAdvance for some arithmetic ops (PR51318 and PR51322).

This fixes a bug where implicit uses of EFLAGS were not marked as ReadAdvance in
the RM/MR variants of ADC/SBB (PR51318)

This also fixes the absence of ReadAdvance for the register operand of
RMW arithmetic instructions (PR51322).

Differential Revision: https://reviews.llvm.org/D107367

(cherry picked from commit 7a1a35a1d1ae2e69769505c9f39910067c53d53b)

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrArithmetic.td
    llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
    llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
    llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index e83e1e74ff526..ba00e7da81f99 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
         mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
     Sched<[sched.Folded, sched.ReadAfterFold]>;
 
+// BinOpRM - Instructions like "adc reg, reg, [mem]".
+// There is an implicit register read at the end of the operand sequence.
+class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
+  : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
+    Sched<[sched.Folded, sched.ReadAfterFold,
+           // base, scale, index, offset, segment.
+           ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+           // implicit register read.
+           sched.ReadAfterFold]>;
+
 // BinOpRM_F - Instructions like "cmp reg, [mem]".
 class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                 SDNode opnode>
@@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
 // BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
 class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                  SDNode opnode>
-  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
+  : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
             [(set typeinfo.RegClass:$dst, EFLAGS,
             (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
                     EFLAGS))]>;
@@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                   SDNode opnode>
   : BinOpMR<opcode, mnemonic, typeinfo,
           [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
-           (implicit EFLAGS)]>, Sched<[WriteALURMW]>;
+           (implicit EFLAGS)]>, Sched<[WriteALURMW,
+                                       // base, scale, index, offset, segment
+                                       ReadDefault, ReadDefault, ReadDefault,
+                                       ReadDefault, ReadDefault,
+                                       WriteALU.ReadAfterFold]>;  // reg
 
 // BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
 class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : BinOpMR<opcode, mnemonic, typeinfo,
             [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
                     addr:$dst),
-             (implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
+             (implicit EFLAGS)]>, Sched<[WriteADCRMW,
+                                         // base, scale, index, offset, segment
+                                         ReadDefault, ReadDefault, ReadDefault,
+                                         ReadDefault, ReadDefault,
+                                         WriteALU.ReadAfterFold,    // reg
+                                         WriteALU.ReadAfterFold]>;  // EFLAGS
 
 // BinOpMR_F - Instructions like "cmp [mem], reg".
 class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,

diff  --git a/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
index 68537e4f76746..4d563adf0cbf6 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s
@@ -1,19 +1,16 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=4 -timeline < %s | FileCheck %s
 
-# FIXME: PR51318
-# Missing read-advance for the implicit use of register EFLAGS.
-
 adc 4(%rsp), %eax
 
 # CHECK:      Iterations:        4
 # CHECK-NEXT: Instructions:      4
-# CHECK-NEXT: Total Cycles:      19
+# CHECK-NEXT: Total Cycles:      10
 # CHECK-NEXT: Total uOps:        4
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.21
-# CHECK-NEXT: IPC:               0.21
+# CHECK-NEXT: uOps Per Cycle:    0.40
+# CHECK-NEXT: IPC:               0.40
 # CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
@@ -52,13 +49,12 @@ adc 4(%rsp), %eax
 # CHECK-NEXT: 1.00   1.00    -      -      -      -      -     1.00    -      -      -      -      -      -     adcl	4(%rsp), %eax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012345678
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeER   .    .  .   adcl	4(%rsp), %eax
-# CHECK-NEXT: [1,0]     D====eeeeER    .  .   adcl	4(%rsp), %eax
-# CHECK-NEXT: [2,0]     .D=======eeeeER.  .   adcl	4(%rsp), %eax
-# CHECK-NEXT: [3,0]     .D===========eeeeER   adcl	4(%rsp), %eax
+# CHECK:      [0,0]     DeeeeER  .   adcl	4(%rsp), %eax
+# CHECK-NEXT: [1,0]     D=eeeeER .   adcl	4(%rsp), %eax
+# CHECK-NEXT: [2,0]     .D=eeeeER.   adcl	4(%rsp), %eax
+# CHECK-NEXT: [3,0]     .D==eeeeER   adcl	4(%rsp), %eax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -67,4 +63,4 @@ adc 4(%rsp), %eax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     4     6.5    0.3    0.0       adcl	4(%rsp), %eax
+# CHECK-NEXT: 0.     4     2.0    0.3    0.0       adcl	4(%rsp), %eax

diff  --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
index 16387c6c26e95..ef25a48f4a724 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s
@@ -1,20 +1,17 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
 
-# FIXME: PR51322
-# Missing read-advance for register EAX.
-
 add %eax, %eax
 adc %eax, 4(%rsp)
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Total Cycles:      9
 # CHECK-NEXT: Total uOps:        2
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.20
-# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: uOps Per Cycle:    0.22
+# CHECK-NEXT: IPC:               0.22
 # CHECK-NEXT: Block RThroughput: 1.5
 
 # CHECK:      Instruction Info:
@@ -55,10 +52,10 @@ adc %eax, 4(%rsp)
 # CHECK-NEXT: 2.00    -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     adcl	%eax, 4(%rsp)
 
 # CHECK:      Timeline view:
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeER .   .   addl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eeeeeeER   adcl	%eax, 4(%rsp)
+# CHECK:      [0,0]     DeER .  .   addl	%eax, %eax
+# CHECK-NEXT: [0,1]     DeeeeeeER   adcl	%eax, 4(%rsp)
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -68,5 +65,5 @@ adc %eax, 4(%rsp)
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       addl	%eax, %eax
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       adcl	%eax, 4(%rsp)
-# CHECK-NEXT:        1     1.5    0.5    0.0       <total>
+# CHECK-NEXT: 1.     1     1.0    0.0    0.0       adcl	%eax, 4(%rsp)
+# CHECK-NEXT:        1     1.0    0.5    0.0       <total>

diff  --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s
index 7ff8884da8522..a8da4515b315e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s
@@ -1,20 +1,17 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
 
-# FIXME: PR51322
-# Missing read-advance for register EAX.
-
 add %eax, %eax
 add %eax, 4(%rsp)
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Total Cycles:      9
 # CHECK-NEXT: Total uOps:        2
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.20
-# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: uOps Per Cycle:    0.22
+# CHECK-NEXT: IPC:               0.22
 # CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
@@ -55,10 +52,10 @@ add %eax, 4(%rsp)
 # CHECK-NEXT: 1.00    -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     addl	%eax, 4(%rsp)
 
 # CHECK:      Timeline view:
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeER .   .   addl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eeeeeeER   addl	%eax, 4(%rsp)
+# CHECK:      [0,0]     DeER .  .   addl	%eax, %eax
+# CHECK-NEXT: [0,1]     DeeeeeeER   addl	%eax, 4(%rsp)
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -68,5 +65,5 @@ add %eax, 4(%rsp)
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       addl	%eax, %eax
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       addl	%eax, 4(%rsp)
-# CHECK-NEXT:        1     1.5    0.5    0.0       <total>
+# CHECK-NEXT: 1.     1     1.0    0.0    0.0       addl	%eax, 4(%rsp)
+# CHECK-NEXT:        1     1.0    0.5    0.0       <total>


        


More information about the llvm-branch-commits mailing list