[llvm] 0a6d797 - [X86] Improve F16C CVT schedules on SNB/HSW/BDW

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 24 09:32:02 PST 2024


Author: Simon Pilgrim
Date: 2024-11-24T17:04:53Z
New Revision: 0a6d797c20f6ab53bc09fb66129f603ed6e4b524

URL: https://github.com/llvm/llvm-project/commit/0a6d797c20f6ab53bc09fb66129f603ed6e4b524
DIFF: https://github.com/llvm/llvm-project/commit/0a6d797c20f6ab53bc09fb66129f603ed6e4b524.diff

LOG: [X86] Improve F16C CVT schedules on SNB/HSW/BDW

Add complete IvyBridge schedule (which is included in the SandyBridge model, IvyBridge was the first to support F16C) - split rr/rm schedules as they usually have very different port usage.

Haswell/Broadwell use Port1 not Port0.

Confirmed with a mixture of Agner + uops.info comparisons.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedBroadwell.td
    llvm/lib/Target/X86/X86SchedHaswell.td
    llvm/lib/Target/X86/X86SchedSandyBridge.td
    llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
    llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
    llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
    llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index e5b3cc4b6c90e6..5b50e1943e3db1 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -393,11 +393,11 @@ defm : BWWriteResPair<WriteCvtPD2PS,  [BWPort1,BWPort5], 4, [1,1], 2, 5>;
 defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1,BWPort5], 6, [1,1], 2, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
 
-defm : X86WriteRes<WriteCvtPH2PS,     [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSY,    [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PS,     [BWPort1,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY,    [BWPort1,BWPort5], 2, [1,1], 2>;
 defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
-defm : X86WriteRes<WriteCvtPH2PSLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd,  [BWPort1,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort1,BWPort23], 6, [1,1], 2>;
 defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
 
 defm : X86WriteRes<WriteCvtPS2PH,    [BWPort1,BWPort5], 4, [1,1], 2>;

diff  --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 59874be34f5a28..d06e8a99370976 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -393,12 +393,12 @@ defm : HWWriteResPair<WriteCvtPD2PS,  [HWPort1,HWPort5], 4, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteCvtPD2PSZ, [HWPort1,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
 
-defm : X86WriteRes<WriteCvtPH2PS,     [HWPort0,HWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSY,    [HWPort0,HWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSZ,    [HWPort0,HWPort5], 2, [1,1], 2>; // Unsupported = 1
-defm : X86WriteRes<WriteCvtPH2PSLd,  [HWPort0,HWPort23], 6, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort0,HWPort23], 7, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPH2PS,     [HWPort1,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY,    [HWPort1,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ,    [HWPort1,HWPort5], 2, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPH2PSLd,  [HWPort1,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort1,HWPort23], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort1,HWPort23], 7, [1,1], 2>; // Unsupported = 1
 
 defm : X86WriteRes<WriteCvtPS2PH,    [HWPort1,HWPort5], 4, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PHY,   [HWPort1,HWPort5], 6, [1,1], 2>;

diff  --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 6939b1227d0a61..775ad6b1078a53 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -361,16 +361,20 @@ defm : SBWriteResPair<WriteCvtPD2PS,  [SBPort1,SBPort5], 4, [1,1], 2, 6>;
 defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
 defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
 
-defm : SBWriteResPair<WriteCvtPH2PS,  [SBPort1], 3>;
-defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
-defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
-
-defm : X86WriteRes<WriteCvtPS2PH,    [SBPort1], 3, [1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHY,   [SBPort1], 3, [1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHZ,   [SBPort1], 3, [1], 1>; // Unsupported = 1
-defm : X86WriteRes<WriteCvtPS2PHSt,  [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
+// F16C Instructions (IvyBridge+)
+defm : X86WriteRes<WriteCvtPH2PS,     [SBPort0,SBPort5], 3, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY,    [SBPort0,SBPort5], 3, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ,    [SBPort0,SBPort5], 3, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPH2PSLd,  [SBPort0,SBPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>; // Unsupported = 1
+
+defm : X86WriteRes<WriteCvtPS2PH,    [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHY,   [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHZ,   [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PHSt,  [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; // Unsupported = 1
 
 // Vector integer operations.
 defm : X86WriteRes<WriteVecLoad,         [SBPort23], 5, [1], 1>;

diff  --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
index 9fcd03bfb2fd45..07870d92dac555 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
@@ -45,14 +45,14 @@ vcvtps2ph   $0, %ymm0, (%rax)
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     4.00   4.00   1.67   1.67   2.00   4.00    -     0.67
+# CHECK-NEXT:  -      -      -     8.00   1.67   1.67   2.00   4.00    -     0.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %ymm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %ymm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtps2ph	$0, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.33   0.33   1.00    -      -     0.33   vcvtps2ph	$0, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtps2ph	$0, %ymm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
index 7dea75f8f8fec0..4abcd6fc516b79 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
@@ -22,14 +22,14 @@ vcvtps2ph   $0, %ymm0, (%rax)
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      3     1.00                        vcvtph2ps	%xmm0, %xmm2
+# CHECK-NEXT:  2      3     1.00                        vcvtph2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   vcvtph2ps	(%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        vcvtph2ps	%xmm0, %ymm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvtph2ps	(%rax), %ymm2
-# CHECK-NEXT:  1      3     1.00                        vcvtps2ph	$0, %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00           *            vcvtps2ph	$0, %xmm0, (%rax)
-# CHECK-NEXT:  1      3     1.00                        vcvtps2ph	$0, %ymm0, %xmm2
-# CHECK-NEXT:  1      4     1.00           *            vcvtps2ph	$0, %ymm0, (%rax)
+# CHECK-NEXT:  2      3     1.00                        vcvtph2ps	%xmm0, %ymm2
+# CHECK-NEXT:  3      8     1.00    *                   vcvtph2ps	(%rax), %ymm2
+# CHECK-NEXT:  3      10    1.00                        vcvtps2ph	$0, %xmm0, %xmm2
+# CHECK-NEXT:  4      13    1.00           *            vcvtps2ph	$0, %xmm0, (%rax)
+# CHECK-NEXT:  3      10    1.00                        vcvtps2ph	$0, %ymm0, %xmm2
+# CHECK-NEXT:  4      13    1.00           *            vcvtps2ph	$0, %ymm0, (%rax)
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SBDivider
@@ -43,15 +43,15 @@ vcvtps2ph   $0, %ymm0, (%rax)
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -      -     8.00   2.00    -     2.00   2.00
+# CHECK-NEXT:  -      -     8.00   4.00   2.00   5.00   2.00   2.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtph2ps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vcvtph2ps	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtph2ps	%xmm0, %ymm2
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vcvtph2ps	(%rax), %ymm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtps2ph	$0, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %xmm0, (%rax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtps2ph	$0, %ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %ymm0, (%rax)
+# CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     vcvtph2ps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vcvtph2ps	(%rax), %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     vcvtph2ps	%xmm0, %ymm2
+# CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vcvtph2ps	(%rax), %ymm2
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     vcvtps2ph	$0, %xmm0, %xmm2
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %xmm0, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     vcvtps2ph	$0, %ymm0, %xmm2
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %ymm0, (%rax)

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
index 538ecf99074eda..d1fb824fee23db 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
@@ -45,14 +45,14 @@ vcvtps2ph   $0, %ymm0, (%rax)
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     4.00   4.00   1.67   1.67   2.00   6.00    -     0.67
+# CHECK-NEXT:  -      -      -     8.00   1.67   1.67   2.00   6.00    -     0.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %ymm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtph2ps	%xmm0, %ymm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtph2ps	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtps2ph	$0, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.33   0.33   1.00   1.00    -     0.33   vcvtps2ph	$0, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtps2ph	$0, %ymm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
index a2ec86e8724faa..9284810b9e73be 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
@@ -22,14 +22,14 @@ vcvtps2ph   $0, %ymm0, (%rax)
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      3     1.00                        vcvtph2ps	%xmm0, %xmm2
+# CHECK-NEXT:  2      3     1.00                        vcvtph2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   vcvtph2ps	(%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        vcvtph2ps	%xmm0, %ymm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvtph2ps	(%rax), %ymm2
-# CHECK-NEXT:  1      3     1.00                        vcvtps2ph	$0, %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00           *            vcvtps2ph	$0, %xmm0, (%rax)
-# CHECK-NEXT:  1      3     1.00                        vcvtps2ph	$0, %ymm0, %xmm2
-# CHECK-NEXT:  1      4     1.00           *            vcvtps2ph	$0, %ymm0, (%rax)
+# CHECK-NEXT:  2      3     1.00                        vcvtph2ps	%xmm0, %ymm2
+# CHECK-NEXT:  3      8     1.00    *                   vcvtph2ps	(%rax), %ymm2
+# CHECK-NEXT:  3      10    1.00                        vcvtps2ph	$0, %xmm0, %xmm2
+# CHECK-NEXT:  4      13    1.00           *            vcvtps2ph	$0, %xmm0, (%rax)
+# CHECK-NEXT:  3      10    1.00                        vcvtps2ph	$0, %ymm0, %xmm2
+# CHECK-NEXT:  4      13    1.00           *            vcvtps2ph	$0, %ymm0, (%rax)
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SBDivider
@@ -43,15 +43,15 @@ vcvtps2ph   $0, %ymm0, (%rax)
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -      -     8.00   2.00    -     2.00   2.00
+# CHECK-NEXT:  -      -     8.00   4.00   2.00   5.00   2.00   2.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtph2ps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vcvtph2ps	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtph2ps	%xmm0, %ymm2
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vcvtph2ps	(%rax), %ymm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtps2ph	$0, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %xmm0, (%rax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcvtps2ph	$0, %ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %ymm0, (%rax)
+# CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     vcvtph2ps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vcvtph2ps	(%rax), %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     vcvtph2ps	%xmm0, %ymm2
+# CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vcvtph2ps	(%rax), %ymm2
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     vcvtps2ph	$0, %xmm0, %xmm2
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %xmm0, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     vcvtps2ph	$0, %ymm0, %xmm2
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vcvtps2ph	$0, %ymm0, (%rax)


        


More information about the llvm-commits mailing list