[llvm] r296722 - LiveRegMatrix: Fix some subreg interference checks
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 1 16:35:09 PST 2017
Author: matze
Date: Wed Mar 1 18:35:08 2017
New Revision: 296722
URL: http://llvm.org/viewvc/llvm-project?rev=296722&view=rev
Log:
LiveRegMatrix: Fix some subreg interference checks
Surprisingly, one of the three interference checks in LiveRegMatrix was
using the main live range instead of the apropriate subregister range
resulting in unnecessarily conservative results.
Added:
llvm/trunk/test/CodeGen/AMDGPU/subreg_interference.mir
Modified:
llvm/trunk/include/llvm/CodeGen/LiveRegMatrix.h
llvm/trunk/lib/CodeGen/LiveRegMatrix.cpp
llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll
Modified: llvm/trunk/include/llvm/CodeGen/LiveRegMatrix.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveRegMatrix.h?rev=296722&r1=296721&r2=296722&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/LiveRegMatrix.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LiveRegMatrix.h Wed Mar 1 18:35:08 2017
@@ -141,7 +141,7 @@ public:
/// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
/// This returns a reference to an internal Query data structure that is only
/// valid until the next query() call.
- LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
+ LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit);
/// Directly access the live interval unions per regunit.
/// This returns an array indexed by the regunit number.
Modified: llvm/trunk/lib/CodeGen/LiveRegMatrix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveRegMatrix.cpp?rev=296722&r1=296721&r2=296722&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LiveRegMatrix.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveRegMatrix.cpp Wed Mar 1 18:35:08 2017
@@ -175,10 +175,10 @@ bool LiveRegMatrix::checkRegUnitInterfer
return Result;
}
-LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
unsigned RegUnit) {
LiveIntervalUnion::Query &Q = Queries[RegUnit];
- Q.init(UserTag, VirtReg, Matrix[RegUnit]);
+ Q.init(UserTag, LR, Matrix[RegUnit]);
return Q;
}
@@ -196,9 +196,12 @@ LiveRegMatrix::checkInterference(LiveInt
return IK_RegUnit;
// Check the matrix for virtual register interference.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- if (query(VirtReg, *Units).checkInterference())
- return IK_VirtReg;
+ bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
+ [&](unsigned Unit, const LiveRange &LR) {
+ return query(LR, Unit).checkInterference();
+ });
+ if (Interference)
+ return IK_VirtReg;
return IK_Free;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll?rev=296722&r1=296721&r2=296722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll Wed Mar 1 18:35:08 2017
@@ -191,10 +191,10 @@ define void @v_test_add_v2i16_zext_to_v2
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
+; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: buffer_store_dwordx4
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
@@ -203,9 +203,9 @@ define void @v_test_add_v2i16_zext_to_v2
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: v_add_u16_e32
; VI: v_add_u16_e32
-; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: buffer_store_dwordx4
define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll?rev=296722&r1=296721&r2=296722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll Wed Mar 1 18:35:08 2017
@@ -5,20 +5,19 @@
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
-; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
-; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
-; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
+; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
+; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
+; SI: v_cmp_nlt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
; GCN: s_cbranch_vccnz
; GCN: one{{$}}
-; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; SI: s_branch
-; VI: buffer_store_short
-; VI: s_endpgm
+; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[B_F32]]
+; GCN: buffer_store_short
+; GCN: s_endpgm
; GCN: two{{$}}
-; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
+; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[B_F16]]
; GCN: s_endpgm
define void @br_cc_f16(
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll?rev=296722&r1=296721&r2=296722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll Wed Mar 1 18:35:08 2017
@@ -1195,9 +1195,8 @@ define void @v_fneg_fp_round_fneg_f64_to
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
-; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]]
; GCN: buffer_store_dword [[RESULT]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll?rev=296722&r1=296721&r2=296722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll Wed Mar 1 18:35:08 2017
@@ -417,10 +417,10 @@ define void @v_insertelement_v2i16_dynam
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
+; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
-; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
+; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
+; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -444,10 +444,10 @@ define void @v_insertelement_v2i16_dynam
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
+; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
-; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
+; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
+; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll?rev=296722&r1=296721&r2=296722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll Wed Mar 1 18:35:08 2017
@@ -188,10 +188,10 @@ define void @v_test_sub_v2i16_zext_to_v2
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
+; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: buffer_store_dwordx4
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
@@ -199,10 +199,10 @@ define void @v_test_sub_v2i16_zext_to_v2
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
-; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
-; VI: v_subrev_u16_e32
-; VI: v_subrev_u16_e32
-; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI-DAG: v_subrev_u16_e32
+; VI-DAG: v_subrev_u16_e32
; VI: buffer_store_dwordx4
define void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Added: llvm/trunk/test/CodeGen/AMDGPU/subreg_interference.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg_interference.mir?rev=296722&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg_interference.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg_interference.mir Wed Mar 1 18:35:08 2017
@@ -0,0 +1,24 @@
+# RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
+---
+# We should not detect any interference between v0/v1 here and only allocate
+# sgpr0-sgpr3.
+#
+# CHECK-LABEL: func0
+# CHECK: S_NOP 0, implicit-def %sgpr0
+# CHECK: S_NOP 0, implicit-def %sgpr3
+# CHECK: S_NOP 0, implicit-def %sgpr1
+# CHECK: S_NOP 0, implicit-def %sgpr2
+# CHECK: S_NOP 0, implicit %sgpr0, implicit %sgpr3
+# CHECK: S_NOP 0, implicit %sgpr1, implicit %sgpr2
+name: func0
+body: |
+ bb.0:
+ S_NOP 0, implicit-def undef %0.sub0 : sreg_128
+ S_NOP 0, implicit-def %0.sub3
+ S_NOP 0, implicit-def undef %1.sub1 : sreg_128
+ S_NOP 0, implicit-def %1.sub2
+
+
+ S_NOP 0, implicit %0.sub0, implicit %0.sub3
+ S_NOP 0, implicit %1.sub1, implicit %1.sub2
+...
More information about the llvm-commits
mailing list