[PATCH] D75392: [AMDGPU] Fix the gfx10 scheduling model for f32 conversions
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 15:49:29 PST 2020
foad created this revision.
foad added reviewers: rampitec, arsenm.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
As far as I can tell on gfx10 conversions to/from f32 (that are not
converting f32 to/from f64) are full rate instructions, but they were
marked as quarter rate instructions.
I have fixed this for gfx10 only. I assume the scheduling model was
correct for older architectures, though I don't have any documentation
handy to confirm that.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D75392
Files:
llvm/lib/Target/AMDGPU/SISchedule.td
llvm/lib/Target/AMDGPU/VOP1Instructions.td
Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -190,7 +190,7 @@
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
} // End SchedRW = [WriteDoubleCvt]
-let SchedRW = [WriteQuarterRate32] in {
+let SchedRW = [WriteFloatCvt] in {
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
@@ -202,7 +202,7 @@
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
-} // End SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteFloatCvt]
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
Index: llvm/lib/Target/AMDGPU/SISchedule.td
===================================================================
--- llvm/lib/Target/AMDGPU/SISchedule.td
+++ llvm/lib/Target/AMDGPU/SISchedule.td
@@ -29,8 +29,8 @@
// Vector ALU instructions
def Write32Bit : SchedWrite;
+def WriteFloatCvt : SchedWrite;
def WriteQuarterRate32 : SchedWrite;
-def WriteFullOrQuarterRate32 : SchedWrite;
def WriteFloatFMA : SchedWrite;
@@ -127,6 +127,7 @@
def : HWVALUWriteRes<Write32Bit, 1>;
def : HWVALUWriteRes<Write64Bit, 2>;
+ def : HWVALUWriteRes<WriteFloatCvt, 4>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
def : HWVALUWriteRes<Write2PassMAI, 2>;
def : HWVALUWriteRes<Write8PassMAI, 8>;
@@ -186,6 +187,7 @@
// The latency values are 1 / (operations / cycle).
// Add 1 stall cycle for VGPR read.
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
+def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D75392.247385.patch
Type: text/x-patch
Size: 2341 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200228/2183e80c/attachment.bin>
More information about the llvm-commits
mailing list