[llvm] 948b262 - [AMDGPU] Add TransVALU to GFX11 scheduling model
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 14 03:25:38 PDT 2023
Author: Jay Foad
Date: 2023-08-14T11:16:19+01:00
New Revision: 948b2625b6415dcc64f71f9ecb8a39bd08b0e4c4
URL: https://github.com/llvm/llvm-project/commit/948b2625b6415dcc64f71f9ecb8a39bd08b0e4c4
DIFF: https://github.com/llvm/llvm-project/commit/948b2625b6415dcc64f71f9ecb8a39bd08b0e4c4.diff
LOG: [AMDGPU] Add TransVALU to GFX11 scheduling model
The GFX11 scheduling model should be identical to GFX10, except for a
few tweaks to latencies.
This GFX10 change from D100123 was missed when GFX11 was added.
Differential Revision: https://reviews.llvm.org/D157709
Added:
Modified:
llvm/lib/Target/AMDGPU/SISchedule.td
llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s
llvm/test/tools/llvm-mca/AMDGPU/gfx11-double.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 53441b5a4cedff..382e5c3b8480a5 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -315,17 +315,20 @@ def : InstRW<[WriteCopy], (instrs COPY)>;
let SchedModel = GFX11SpeedModel in {
+// The latency values are 1 / (operations / cycle).
+// Add 1 stall cycle for VGPR read.
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
-def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
+def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>;
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>;
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>;
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>;
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
-def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>;
+def : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 40>;
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
@@ -334,6 +337,7 @@ def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s
index 64cf577e031a3f..571f1ef6c7ec0b 100644
--- a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s
+++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s
@@ -50,52 +50,29 @@ v_sqrt_f64 v[2:3], v[0:1]
# GFX11-NEXT: 1 40 1.00 U v_rsq_f64_e32 v[1:2], v[1:2]
# GFX11-NEXT: 1 40 1.00 U v_sqrt_f64_e32 v[2:3], v[0:1]
-# GFX10: Resources:
-# GFX10-NEXT: [0] - HWBranch
-# GFX10-NEXT: [1] - HWExport
-# GFX10-NEXT: [2] - HWLGKM
-# GFX10-NEXT: [3] - HWRC
-# GFX10-NEXT: [4] - HWSALU
-# GFX10-NEXT: [5] - HWTransVALU
-# GFX10-NEXT: [6] - HWVALU
-# GFX10-NEXT: [7] - HWVMEM
-
-# GFX11: Resources:
-# GFX11-NEXT: [0] - HWBranch
-# GFX11-NEXT: [1] - HWExport
-# GFX11-NEXT: [2] - HWLGKM
-# GFX11-NEXT: [3] - HWRC
-# GFX11-NEXT: [4] - HWSALU
-# GFX11-NEXT: [5] - HWVALU
-# GFX11-NEXT: [6] - HWVMEM
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWBranch
+# CHECK-NEXT: [1] - HWExport
+# CHECK-NEXT: [2] - HWLGKM
+# CHECK-NEXT: [3] - HWRC
+# CHECK-NEXT: [4] - HWSALU
+# CHECK-NEXT: [5] - HWTransVALU
+# CHECK-NEXT: [6] - HWVALU
+# CHECK-NEXT: [7] - HWVMEM
# CHECK: Resource pressure per iteration:
-
-# GFX10-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# GFX10-NEXT: - - - 7.00 - 7.00 3.00 -
-
-# GFX11-NEXT: [0] [1] [2] [3] [4] [5] [6]
-# GFX11-NEXT: - - - 7.00 - 7.00 -
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 7.00 - 7.00 3.00 -
# CHECK: Resource pressure by instruction:
-
-# GFX10-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
-# GFX10-NEXT: - - - 1.00 - 1.00 - - v_log_f32_e32 v0, v0
-# GFX10-NEXT: - - - 1.00 - 1.00 - - v_rcp_f32_e32 v0, v0
-# GFX10-NEXT: - - - 1.00 - 1.00 - - v_rsq_f32_e32 v1, v1
-# GFX10-NEXT: - - - 1.00 - 1.00 - - v_sqrt_f32_e32 v2, v0
-# GFX10-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
-# GFX10-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[1:2], v[1:2]
-# GFX10-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[2:3], v[0:1]
-
-# GFX11-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions:
-# GFX11-NEXT: - - - 1.00 - 1.00 - v_log_f32_e32 v0, v0
-# GFX11-NEXT: - - - 1.00 - 1.00 - v_rcp_f32_e32 v0, v0
-# GFX11-NEXT: - - - 1.00 - 1.00 - v_rsq_f32_e32 v1, v1
-# GFX11-NEXT: - - - 1.00 - 1.00 - v_sqrt_f32_e32 v2, v0
-# GFX11-NEXT: - - - 1.00 - 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
-# GFX11-NEXT: - - - 1.00 - 1.00 - v_rsq_f64_e32 v[1:2], v[1:2]
-# GFX11-NEXT: - - - 1.00 - 1.00 - v_sqrt_f64_e32 v[2:3], v[0:1]
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_log_f32_e32 v0, v0
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_rcp_f32_e32 v0, v0
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_rsq_f32_e32 v1, v1
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_sqrt_f32_e32 v2, v0
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[1:2], v[1:2]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[2:3], v[0:1]
# CHECK: Timeline view:
diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx11-double.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx11-double.s
index 968fbf7a03d5d1..7516ee97818a0a 100644
--- a/llvm/test/tools/llvm-mca/AMDGPU/gfx11-double.s
+++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx11-double.s
@@ -41,12 +41,12 @@ v_sqrt_f64 v[4:5], v[4:5]
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 28
-# CHECK-NEXT: Total Cycles: 384
+# CHECK-NEXT: Total Cycles: 349
# CHECK-NEXT: Total uOps: 29
# CHECK: Dispatch Width: 1
# CHECK-NEXT: uOps Per Cycle: 0.08
-# CHECK-NEXT: IPC: 0.07
+# CHECK-NEXT: IPC: 0.08
# CHECK-NEXT: Block RThroughput: 29.0
# CHECK: Instruction Info:
@@ -93,76 +93,77 @@ v_sqrt_f64 v[4:5], v[4:5]
# CHECK-NEXT: [2] - HWLGKM
# CHECK-NEXT: [3] - HWRC
# CHECK-NEXT: [4] - HWSALU
-# CHECK-NEXT: [5] - HWVALU
-# CHECK-NEXT: [6] - HWVMEM
+# CHECK-NEXT: [5] - HWTransVALU
+# CHECK-NEXT: [6] - HWVALU
+# CHECK-NEXT: [7] - HWVMEM
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6]
-# CHECK-NEXT: - - - 29.00 1.00 28.00 -
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 29.00 1.00 3.00 28.00 -
# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_i32_e32 v[2:3], v2
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f32_f64_e32 v4, v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_f32_e32 v[6:7], v6
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_u32_f64_e32 v8, v[8:9]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_u32_e32 v[10:11], v10
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_frexp_exp_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_frexp_mant_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_fract_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_trunc_f64_e32 v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_ceil_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_rndne_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_floor_f64_e32 v[6:7], v[6:7]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f64 v[2:3], v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_mul_f64 v[4:5], v[4:5], v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_min_f64 v[6:7], v[6:7], v[6:7]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_max_f64 v[8:9], v[8:9], v[8:9]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_ldexp_f64 v[2:3], v[2:3], v0
-# CHECK-NEXT: - - - 2.00 1.00 1.00 - v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_trig_preop_f64 v[2:3], v[2:3], v0
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_rsq_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_sqrt_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_i32_e32 v[2:3], v2
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f32_f64_e32 v4, v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_f32_e32 v[6:7], v6
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_u32_f64_e32 v8, v[8:9]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_u32_e32 v[10:11], v10
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_exp_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_mant_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_fract_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_trunc_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_ceil_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_rndne_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_floor_f64_e32 v[6:7], v[6:7]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f64 v[2:3], v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_mul_f64 v[4:5], v[4:5], v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_min_f64 v[6:7], v[6:7], v[6:7]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_max_f64 v[8:9], v[8:9], v[8:9]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_ldexp_f64 v[2:3], v[2:3], v0
+# CHECK-NEXT: - - - 2.00 1.00 - 1.00 - v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_trig_preop_f64 v[2:3], v[2:3], v0
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[4:5], v[4:5]
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123
-
-# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
-# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
-# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
-# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
-# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
-# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: [0,7] . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_fract_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: [0,9] . . . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_trunc_f64_e32 v[0:1], v[0:1]
-# CHECK-NEXT: [0,10] . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_ceil_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: [0,11] . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_rndne_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: [0,12] . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_floor_f64_e32 v[6:7], v[6:7]
-# CHECK-NEXT: [0,13] . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: [0,14] . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_add_f64 v[2:3], v[2:3], v[2:3]
-# CHECK-NEXT: [0,15] . . . . . . . . . . . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_mul_f64 v[4:5], v[4:5], v[4:5]
-# CHECK-NEXT: [0,16] . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_min_f64 v[6:7], v[6:7], v[6:7]
-# CHECK-NEXT: [0,17] . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_max_f64 v[8:9], v[8:9], v[8:9]
-# CHECK-NEXT: [0,18] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: [0,19] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: [0,20] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . v_ldexp_f64 v[2:3], v[2:3], v0
-# CHECK-NEXT: [0,21] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: [0,22] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_trig_preop_f64 v[2:3], v[2:3], v0
-# CHECK-NEXT: [0,23] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
-# CHECK-NEXT: [0,24] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
-# CHECK-NEXT: [0,25] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . v_rcp_f64_e32 v[0:1], v[0:1]
-# CHECK-NEXT: [0,26] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. v_rsq_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: [0,27] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE v_sqrt_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
+# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
+# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
+# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
+# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
+# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: [0,7] . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_fract_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: [0,9] . . . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_trunc_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: [0,10] . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_ceil_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: [0,11] . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_rndne_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: [0,12] . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_floor_f64_e32 v[6:7], v[6:7]
+# CHECK-NEXT: [0,13] . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: [0,14] . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_add_f64 v[2:3], v[2:3], v[2:3]
+# CHECK-NEXT: [0,15] . . . . . . . . . . . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_mul_f64 v[4:5], v[4:5], v[4:5]
+# CHECK-NEXT: [0,16] . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_min_f64 v[6:7], v[6:7], v[6:7]
+# CHECK-NEXT: [0,17] . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_max_f64 v[8:9], v[8:9], v[8:9]
+# CHECK-NEXT: [0,18] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: [0,19] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: [0,20] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . v_ldexp_f64 v[2:3], v[2:3], v0
+# CHECK-NEXT: [0,21] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: [0,22] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_trig_preop_f64 v[2:3], v[2:3], v0
+# CHECK-NEXT: [0,23] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
+# CHECK-NEXT: [0,24] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . . v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
+# CHECK-NEXT: [0,25] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: [0,26] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. v_rsq_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: [0,27] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE v_sqrt_f64_e32 v[4:5], v[4:5]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
More information about the llvm-commits
mailing list