[llvm] r273751 - AMDGPU: Define a schedule class for COPY.
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 24 16:52:11 PDT 2016
Author: matze
Date: Fri Jun 24 18:52:11 2016
New Revision: 273751
URL: http://llvm.org/viewvc/llvm-project?rev=273751&view=rev
Log:
AMDGPU: Define a schedule class for COPY.
COPY was lacking a scheduling class, define it to avoid regressions in
the upcoming change to the bidirectional MachineScheduler. Approved by
tstellar on IRC.
Differential Revision: http://reviews.llvm.org/D21540
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SISchedule.td
llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll
llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll
llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll
llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll
llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll
llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Fri Jun 24 18:52:11 2016
@@ -349,6 +349,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DPP;
}
+ bool isVGPRCopy(const MachineInstr &MI) const {
+ assert(MI.isCopy());
+ unsigned Dest = MI.getOperand(0).getReg();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return !RI.isSGPRReg(MRI, Dest);
+ }
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SISchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SISchedule.td?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SISchedule.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SISchedule.td Fri Jun 24 18:52:11 2016
@@ -11,6 +11,12 @@
//
//===----------------------------------------------------------------------===//
+def : PredicateProlog<[{
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
def WriteBranch : SchedWrite;
def WriteExport : SchedWrite;
def WriteLDS : SchedWrite;
@@ -96,6 +102,12 @@ multiclass SICommonWriteRes {
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
}
+def PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>;
+def PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>;
+def WriteCopy : SchedWriteVariant<[
+ SchedVar<PredIsVGPR32Copy, [Write32Bit]>,
+ SchedVar<PredIsVGPR64Copy, [Write64Bit]>,
+ SchedVar<NoSchedPred, [WriteSALU]>]>;
let SchedModel = SIFullSpeedModel in {
@@ -105,6 +117,8 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
+def : InstRW<[WriteCopy], (instrs COPY)>;
+
} // End SchedModel = SIFullSpeedModel
let SchedModel = SIQuarterSpeedModel in {
@@ -115,4 +129,6 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
+def : InstRW<[WriteCopy], (instrs COPY)>;
+
} // End SchedModel = SIQuarterSpeedModel
Modified: llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll Fri Jun 24 18:52:11 2016
@@ -136,7 +136,8 @@ define void @s_ctlz_i64_trunc(i32 addrsp
}
; FUNC-LABEL: {{^}}v_ctlz_i64:
-; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
+; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
@@ -145,7 +146,6 @@ define void @s_ctlz_i64_trunc(i32 addrsp
; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
-; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
Modified: llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll Fri Jun 24 18:52:11 2016
@@ -116,7 +116,7 @@ define void @s_ctlz_zero_undef_i64_trunc
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64:
-; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll Fri Jun 24 18:52:11 2016
@@ -145,7 +145,7 @@ endif:
; FUNC-LABEL: {{^}}s_ctpop_i128:
; GCN: s_bcnt1_i32_b64 [[SRESULT0:s[0-9]+]],
; GCN: s_bcnt1_i32_b64 [[SRESULT1:s[0-9]+]],
-; GCN: s_add_i32 s{{[0-9]+}}, [[SRESULT0]], [[SRESULT1]]
+; GCN: s_add_i32 s{{[0-9]+}}, [[SRESULT1]], [[SRESULT0]]
; GCN: s_endpgm
define void @s_ctpop_i128(i32 addrspace(1)* noalias %out, i128 %val) nounwind {
%ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone
Modified: llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll Fri Jun 24 18:52:11 2016
@@ -25,8 +25,8 @@ define void @v_ftrunc_f64(double addrspa
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01
-; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]]
+; SI-DAG: s_add_i32 [[SEXP1:s[0-9]+]], [[SEXP]], 0xfffffc01
+; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP1]]
; SI-DAG: s_not_b64
; SI-DAG: s_and_b64
; SI-DAG: cmp_gt_i32
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll Fri Jun 24 18:52:11 2016
@@ -56,9 +56,9 @@ entry:
}
; FUNC-LABEL: {{^}}local_load_v16i32:
-; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:4{{$}}
-; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:5{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:5 offset1:6{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7{{$}}
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
define void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
entry:
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-memory-two-objects.ll Fri Jun 24 18:52:11 2016
@@ -32,8 +32,7 @@
; GCN: v_lshlrev_b32_e32 [[ADDRW:v[0-9]+]], 2, v0
-; CI-DAG: ds_write_b32 [[ADDRW]], {{v[0-9]*}} offset:16
-; CI-DAG: ds_write_b32 [[ADDRW]], {{v[0-9]*$}}
+; CI-DAG: ds_write2_b32 [[ADDRW]], {{v[0-9]*}}, {{v[0-9]+}} offset0:4
; SI: v_add_i32_e32 [[ADDRW_OFF:v[0-9]+]], vcc, 16, [[ADDRW]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll Fri Jun 24 18:52:11 2016
@@ -5,13 +5,13 @@
; FIXME: Due to changes in the load clustering heuristics. We no longer
; cluster all argument loads together on SI.
; SI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
; VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
-; VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24
; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
store i32 %x, i32 addrspace(1)* %out0, align 4
store i32 %y, i32 addrspace(1)* %out1, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll?rev=273751&r1=273750&r2=273751&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll Fri Jun 24 18:52:11 2016
@@ -57,8 +57,8 @@ define void @shl_2_add_999_i32(i32 addrs
; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
-; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
-; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: s_addk_i32 [[RESULT]], 0x3d8
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
; SI: buffer_store_dword [[VRESULT]]
define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
@@ -74,8 +74,8 @@ define void @test_add_shl_add_constant(i
; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
; SI: s_add_i32 [[TMP:s[0-9]+]], [[Y]], [[SHL3]]
-; SI: s_addk_i32 [[TMP]], 0x3d8
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
; SI: buffer_store_dword [[VRESULT]]
define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
More information about the llvm-commits
mailing list