[PATCH] D33783: [AMDGPU] Force qsads instrs to use different dest register than source registers

Mark Searles via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 1 08:56:48 PDT 2017


msearles created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl, arsenm.

The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources so that the first pass does not overwrite sources for the following passes.


https://reviews.llvm.org/D33783

Files:
  lib/Target/AMDGPU/VOP3Instructions.td
  test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
  test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
  test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll


Index: test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -4,15 +4,15 @@
 declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-NOT: v_qsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
   %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0
   store i64 %result, i64 addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate:
-; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-NOT: v_qsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) {
   %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0
   store i64 %result, i64 addrspace(1)* %out, align 4
Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
@@ -4,15 +4,15 @@
 declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0
 
 ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-NOT: v_mqsad_u32_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) {
   %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> <i32 100, i32 100, i32 100, i32 100>) #0
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-NOT: v_mqsad_u32_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) {
   %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -4,15 +4,15 @@
 declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-NOT: v_mqsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
   %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0
   store i64 %result, i64 addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate:
-; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-NOT: v_mqsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) {
   %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0
   store i64 %result, i64 addrspace(1)* %out, align 4
Index: lib/Target/AMDGPU/VOP3Instructions.td
===================================================================
--- lib/Target/AMDGPU/VOP3Instructions.td
+++ lib/Target/AMDGPU/VOP3Instructions.td
@@ -209,7 +209,10 @@
 }
 
 def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
+
+let Constraints = "@earlyclobber $vdst" in {
 def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>;
+} // End Constraints = "@earlyclobber $vdst"
 
 def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
   let SchedRW = [WriteDouble];
@@ -232,8 +235,10 @@
 
 let SubtargetPredicate = isCIVI in {
 
+let Constraints = "@earlyclobber $vdst" in {
 def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>;
 def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
+} // End Constraints = "@earlyclobber $vdst"
 
 let isCommutable = 1 in {
 def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D33783.101032.patch
Type: text/x-patch
Size: 5393 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170601/59d8242c/attachment.bin>


More information about the llvm-commits mailing list