[PATCH] D19401: MachineScheduler: Fully compare top/bottom candidates

Fri Apr 22 11:24:07 PDT 2016

tstellarAMD added a comment.

I'm going to test this patch to try to get some register usage statistics and I'll try to see if I can fix the regressing tests.


================
Comment at: test/CodeGen/AMDGPU/ds_read2_offset_order.ll:11-13
@@ -10,6 +10,5 @@
 
-; SI: ds_read2st64_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:4{{$}}
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:2 offset1:3
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:14 offset1:12
-; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:44
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset1:2
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:3 offset1:11
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:48
 
----------------
This change doesn't look right to me.  I need to look at this test more closely.

================
Comment at: test/CodeGen/AMDGPU/s_addk_i32.ll:6-7
@@ -5,4 +5,4 @@
 ; SI: s_load_dword [[VAL:s[0-9]+]]
-; SI: s_addk_i32 [[VAL]], 0x41
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]]
+; SI: s_add_i32 [[ADDRES:s[0-9]+]], [[VAL]], 0x41
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[ADDRES]]
 ; SI: buffer_store_dword [[VRESULT]]
----------------
This is a regression.

================
Comment at: test/CodeGen/AMDGPU/s_addk_i32.ll:31-50
@@ -30,22 +30,22 @@
 ; SI-LABEL: {{^}}s_addk_i32_k1:
-; SI: s_addk_i32 {{s[0-9]+}}, 0x7fff{{$}}
+; SI: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x7fff{{$}}
 ; SI: s_endpgm
 define void @s_addk_i32_k1(i32 addrspace(1)* %out, i32 %b) {
   %add = add i32 %b, 32767 ; (1 << 15) - 1
   store i32 %add, i32 addrspace(1)* %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_addk_i32_k2:
-; SI: s_addk_i32 {{s[0-9]+}}, 0xffef{{$}}
+; SI: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0xffffffef
 ; SI: s_endpgm
 define void @s_addk_i32_k2(i32 addrspace(1)* %out, i32 %b) {
   %add = add i32 %b, -17
   store i32 %add, i32 addrspace(1)* %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_addk_v2i32_k0:
-; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41
-; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42
+; SI-DAG: s_add_i32 {{s[0-9+]}}, {{s[0-9]+}}, 0x41
+; SI-DAG: s_add_i32 {{s[0-9+]}}, {{s[0-9]+}}, 0x42
 ; SI: s_endpgm
----------------
These are all regressions.

================
Comment at: test/CodeGen/AMDGPU/shl_add_constant.ll:61-62
@@ -60,4 +60,4 @@
 ; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
-; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: s_addk_i32 [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
 ; SI: buffer_store_dword [[VRESULT]]
----------------
This is an improvement.

================
Comment at: test/CodeGen/AMDGPU/shl_add_constant.ll:77-78
@@ -76,4 +76,4 @@
 ; SI: s_add_i32 [[TMP:s[0-9]+]], [[Y]], [[SHL3]]
-; SI: s_addk_i32 [[TMP]], 0x3d8
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
+; SI: s_add_i32 [[TMP2:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP2]]
 ; SI: buffer_store_dword [[VRESULT]]
----------------
This is a regression.


Repository:
  rL LLVM

http://reviews.llvm.org/D19401