[PATCH] D102211: [AMDGPU] Do not clause NSA instructions

Carl Ritson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon May 10 20:20:27 PDT 2021


critson updated this revision to Diff 344280.
critson added a comment.

- Rebase on pre-commit tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102211/new/

https://reviews.llvm.org/D102211

Files:
  llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
  llvm/test/CodeGen/AMDGPU/cluster_stores.ll
  llvm/test/CodeGen/AMDGPU/hard-clauses.mir


Index: llvm/test/CodeGen/AMDGPU/hard-clauses.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/hard-clauses.mir
+++ llvm/test/CodeGen/AMDGPU/hard-clauses.mir
@@ -218,11 +218,8 @@
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
     ; CHECK-LABEL: name: mimg_nsa
     ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-    ; CHECK: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr21_vgpr22, implicit-def $vgpr21_vgpr22_vgpr23, implicit-def $vgpr22_vgpr23, implicit $vgpr3, implicit $vgpr8, implicit $vgpr7, implicit $vgpr5, implicit $vgpr4, implicit $vgpr6, implicit $vgpr0, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec {
-    ; CHECK:   S_CLAUSE 1
-    ; CHECK:   $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
-    ; CHECK:   $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
-    ; CHECK: }
+    ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+    ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
     $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec  :: (load 16)
     $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec  :: (load 16)
 ...
@@ -235,12 +232,9 @@
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
     ; CHECK-LABEL: name: mimg_nsa_mixed
     ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-    ; CHECK: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr21_vgpr22, implicit-def $vgpr21_vgpr22_vgpr23, implicit-def $vgpr22_vgpr23, implicit $vgpr3, implicit $vgpr8, implicit $vgpr7, implicit $vgpr5, implicit $vgpr4, implicit $vgpr6, implicit $vgpr0, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5_vgpr6 {
-    ; CHECK:   S_CLAUSE 2
-    ; CHECK:   $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
-    ; CHECK:   $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
-    ; CHECK:   $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
-    ; CHECK: }
+    ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+    ; CHECK: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+    ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
     $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec  :: (load 16)
     $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
     $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec  :: (load 16)
Index: llvm/test/CodeGen/AMDGPU/cluster_stores.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/cluster_stores.ll
+++ llvm/test/CodeGen/AMDGPU/cluster_stores.ll
@@ -359,7 +359,6 @@
 ; GFX10-NEXT:    v_add_f32_e32 v12, 1.0, v3
 ; GFX10-NEXT:    v_add_f32_e32 v14, 2.0, v2
 ; GFX10-NEXT:    v_add_f32_e32 v15, 2.0, v3
-; GFX10-NEXT:    s_clause 0x1
 ; GFX10-NEXT:    image_sample_d v[2:5], [v11, v12, v13, v13, v13, v13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
 ; GFX10-NEXT:    image_sample_d v[6:9], [v14, v15, v10, v10, v10, v10], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
Index: llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -76,6 +76,13 @@
   }
 
   HardClauseType getHardClauseType(const MachineInstr &MI) {
+    // Do not clause NSA instructions on GFX10.
+    if (AMDGPU::isGFX10(*ST) && ST->hasFeature(AMDGPU::FeatureNSAEncoding)) {
+      const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+      if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
+        return HARDCLAUSE_ILLEGAL;
+    }
+
     // On current architectures we only get a benefit from clausing loads.
     if (MI.mayLoad()) {
       if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102211.344280.patch
Type: text/x-patch
Size: 9309 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210511/3befdc71/attachment.bin>


More information about the llvm-commits mailing list