[llvm] r264877 - AMDGPU/SI: Improve MachineSchedModel definition
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 30 09:35:14 PDT 2016
Author: tstellar
Date: Wed Mar 30 11:35:13 2016
New Revision: 264877
URL: http://llvm.org/viewvc/llvm-project?rev=264877&view=rev
Log:
AMDGPU/SI: Improve MachineSchedModel definition
This patch contains a few improvements to the model, including:
- Using a single resource with a defined buffers size for each memory unit.
- Setting the IssueWidth correctly.
- Fixing latency values for memory instructions.
shader-db stats:
16429 shaders in 3231 tests
Totals:
SGPRS: 318232 -> 312328 (-1.86 %)
VGPRS: 208996 -> 209346 (0.17 %)
Code Size: 7147044 -> 7166440 (0.27 %) bytes
LDS: 83 -> 83 (0.00 %) blocks
Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave
Max Waves: 49182 -> 49243 (0.12 %)
Wait states: 0 -> 0 (0.00 %)A
Differential Revision: http://reviews.llvm.org/D18453
Modified:
llvm/trunk/lib/Target/AMDGPU/SISchedule.td
llvm/trunk/test/CodeGen/AMDGPU/ds_read2_offset_order.ll
llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll
llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll
llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll
llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SISchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SISchedule.td?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SISchedule.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SISchedule.td Wed Mar 30 11:35:13 2016
@@ -39,24 +39,32 @@ def Write64Bit : SchedWrite;
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
// instructions)
-def SIFullSpeedModel : SchedMachineModel {
- let CompleteModel = 0;
-}
-def SIQuarterSpeedModel : SchedMachineModel {
+class SISchedMachineModel : SchedMachineModel {
let CompleteModel = 0;
+ let IssueWidth = 1;
}
-// BufferSize = 0 means the processors are in-order.
-let BufferSize = 0 in {
+def SIFullSpeedModel : SISchedMachineModel;
+def SIQuarterSpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
-def HWBranch : ProcResource<1>;
-def HWExport : ProcResource<7>; // Taken from S_WAITCNT
-def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT
-def HWSALU : ProcResource<1>;
-def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT
-def HWVALU : ProcResource<1>;
-
+def HWBranch : ProcResource<1> {
+ let BufferSize = 1;
+}
+def HWExport : ProcResource<1> {
+ let BufferSize = 7; // Taken from S_WAITCNT
+}
+def HWLGKM : ProcResource<1> {
+ let BufferSize = 31; // Taken from S_WAITCNT
+}
+def HWSALU : ProcResource<1> {
+ let BufferSize = 1;
+}
+def HWVMEM : ProcResource<1> {
+ let BufferSize = 15; // Taken from S_WAITCNT
+}
+def HWVALU : ProcResource<1> {
+ let BufferSize = 1;
}
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
@@ -74,12 +82,12 @@ class HWVALUWriteRes<SchedWrite write, i
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
- def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ???
- def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ???
- def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64
- def : HWWriteRes<WriteSALU, [HWSALU], 1>;
- def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
- def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
+ def : HWWriteRes<WriteBranch, [HWBranch], 8>;
+ def : HWWriteRes<WriteExport, [HWExport], 4>;
+ def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64
+ def : HWWriteRes<WriteSALU, [HWSALU], 1>;
+ def : HWWriteRes<WriteSMEM, [HWLGKM], 5>;
+ def : HWWriteRes<WriteVMEM, [HWVMEM], 80>;
def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???
def : HWVALUWriteRes<Write32Bit, 1>;
Modified: llvm/trunk/test/CodeGen/AMDGPU/ds_read2_offset_order.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ds_read2_offset_order.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ds_read2_offset_order.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ds_read2_offset_order.ll Wed Mar 30 11:35:13 2016
@@ -8,9 +8,9 @@
; SI-LABEL: {{^}}offset_order:
-; SI: ds_read2st64_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset1:4{{$}}
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:3 offset1:2
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:12 offset1:14
+; SI: ds_read2st64_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:4{{$}}
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:2 offset1:3
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:14 offset1:12
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:44
define void @offset_order(float addrspace(1)* %out) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll Wed Mar 30 11:35:13 2016
@@ -12,11 +12,11 @@ declare <16 x double> @llvm.ceil.v16f64(
; FUNC-LABEL: {{^}}fceil_f64:
; CI: v_ceil_f64_e32
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: s_lshr_b64
-; SI: s_not_b64
-; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: s_add_i32 [[A:s[0-9]+]], [[SEXP]], 0xfffffc01
+; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[A]]
+; SI-DAG: s_not_b64
+; SI-DAG: s_and_b64
; SI-DAG: cmp_gt_i32
; SI-DAG: cndmask_b32
; SI-DAG: cndmask_b32
Modified: llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll Wed Mar 30 11:35:13 2016
@@ -24,11 +24,11 @@ define void @v_ftrunc_f64(double addrspa
; CI: v_trunc_f64_e32
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: s_lshr_b64
-; SI: s_not_b64
-; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: s_add_i32 [[A:s[0-9]+]], [[SEXP]], 0xfffffc01
+; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[A]]
+; SI-DAG: s_not_b64
+; SI-DAG: s_and_b64
; SI-DAG: cmp_gt_i32
; SI-DAG: cndmask_b32
; SI-DAG: cndmask_b32
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll Wed Mar 30 11:35:13 2016
@@ -6,13 +6,13 @@ declare double @llvm.AMDGPU.rsq.clamped.
; FUNC-LABEL: {{^}}rsq_clamped_f64:
; SI: v_rsq_clamp_f64_e32
-; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3]
+; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}]
; TODO: this constant should be folded:
-; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
-; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
-; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
+; VI-DAG: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
+; VI-DAG: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
+; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
+; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
-; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll Wed Mar 30 11:35:13 2016
@@ -24,13 +24,13 @@ define void @rsq_clamp_f32(float addrspa
; FUNC-LABEL: {{^}}rsq_clamp_f64:
; SI: v_rsq_clamp_f64_e32
-; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3]
; TODO: this constant should be folded:
-; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
-; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
-; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
+; VI-DAG: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
+; VI-DAG: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
+; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
+; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
+; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
-; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.memcpy.ll Wed Mar 30 11:35:13 2016
@@ -6,77 +6,77 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+; SI-DAG: ds_read_u8
+
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
+; SI-DAG: ds_write_b8
; SI: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
@@ -87,41 +87,41 @@ define void @test_small_memcpy_i64_lds_t
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+; SI-DAG: ds_read_u16
+
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
+; SI-DAG: ds_write_b16
; SI: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll Wed Mar 30 11:35:13 2016
@@ -32,8 +32,8 @@
; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
+; CI-DAG: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
+; CI-DAG: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll Wed Mar 30 11:35:13 2016
@@ -156,9 +156,11 @@ define void @reorder_global_load_local_s
}
; FUNC-LABEL: @reorder_local_offsets
+; FIXME: The scheduler doesn't think its proftible to re-order the
+; loads and stores, and I'm not sure that it really is.
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
-; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
; CI: buffer_store_dword
Modified: llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll?rev=264877&r1=264876&r2=264877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll Wed Mar 30 11:35:13 2016
@@ -107,50 +107,54 @@ define void @test_udivrem(i32 addrspace(
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]]
+; For SI, we used to have checks for the input and output registers
+; of the instructions, but these are way too fragile. The division for
+; the two vector elements can be intermixed which makes it impossible to
+; accurately check all the operands.
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI: s_endpgm
More information about the llvm-commits
mailing list