[llvm] 38fb446 - AMDGPU/GlobalISel: Fix test failure in release build

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 6 08:01:25 PDT 2020


Author: Matt Arsenault
Date: 2020-06-06T11:01:18-04:00
New Revision: 38fb446fc7fc7683d230f93ade61d5d41c3587bd

URL: https://github.com/llvm/llvm-project/commit/38fb446fc7fc7683d230f93ade61d5d41c3587bd
DIFF: https://github.com/llvm/llvm-project/commit/38fb446fc7fc7683d230f93ade61d5d41c3587bd.diff

LOG: AMDGPU/GlobalISel: Fix test failure in release build

The annoying behavior where the output is different due to the
legality check struck again, plus the subtarget predicate wasn't
really correctly set for DS FP atomics.

Some of the FP min/max instructions seem to be in the gfx6/gfx7
manuals, but IIRC this might have been one of the cases where the
manual got ahead of the actual hardware support, but I've left these
as-is for now since the assembler tests seem to expect them.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPU.td
    llvm/lib/Target/AMDGPU/DSInstructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5ed8dc6a8015..cb03aeabe17c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1048,6 +1048,9 @@ def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
 def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
   AssemblerPredicate<(all_of FeatureGFX9Insts)>;
 
+def HasLDSFPAtomics : Predicate<"Subtarget->hasLDSFPAtomics()">,
+  AssemblerPredicate<(all_of FeatureGFX8Insts)>;
+
 def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
   AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>;
 

diff  --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 7c2162541934..545c225369da 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -388,7 +388,12 @@ defm DS_MAX_U32       : DS_1A1D_NORET_mc<"ds_max_u32">;
 defm DS_AND_B32       : DS_1A1D_NORET_mc<"ds_and_b32">;
 defm DS_OR_B32        : DS_1A1D_NORET_mc<"ds_or_b32">;
 defm DS_XOR_B32       : DS_1A1D_NORET_mc<"ds_xor_b32">;
+
+let SubtargetPredicate = HasLDSFPAtomics in {
 defm DS_ADD_F32       : DS_1A1D_NORET_mc<"ds_add_f32">;
+}
+
+// FIXME: Are these really present pre-gfx8?
 defm DS_MIN_F32       : DS_1A1D_NORET_mc<"ds_min_f32">;
 defm DS_MAX_F32       : DS_1A1D_NORET_mc<"ds_max_f32">;
 
@@ -443,7 +448,10 @@ defm DS_MIN_F64       : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
 defm DS_MAX_F64       : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
 
 defm DS_ADD_RTN_U32   : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
+
+let SubtargetPredicate = HasLDSFPAtomics in {
 defm DS_ADD_RTN_F32   : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">;
+}
 defm DS_SUB_RTN_U32   : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
 defm DS_RSUB_RTN_U32  : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
 defm DS_INC_RTN_U32   : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
@@ -609,10 +617,12 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
                                        int_amdgcn_ds_bpermute>;
 }
 
-def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
-
 } // let SubtargetPredicate = isGFX8Plus
 
+let SubtargetPredicate = HasLDSFPAtomics in {
+def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
+}
+
 //===----------------------------------------------------------------------===//
 // DS Patterns
 //===----------------------------------------------------------------------===//
@@ -830,9 +840,12 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
 defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
 defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
 defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+
+let SubtargetPredicate = HasLDSFPAtomics in {
 defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
 defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
 defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
+}
 
 // 64-bit atomics.
 defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir
index 5e4f6e0842f0..87bc4be1e1d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir
@@ -1,10 +1,11 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
 # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
 
+# GFX6/7 selection should fail.
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
 
 ---
 name:            atomicrmw_fadd_s32_local
@@ -15,12 +16,6 @@ body:             |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; GFX6-LABEL: name: atomicrmw_fadd_s32_local
-    ; GFX6: liveins: $vgpr0, $vgpr1
-    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
-    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-    ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
-    ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
     ; GFX8-LABEL: name: atomicrmw_fadd_s32_local
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -34,6 +29,13 @@ body:             |
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]]
+    ; GFX6-LABEL: name: atomicrmw_fadd_s32_local
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
+    ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3)
@@ -50,11 +52,6 @@ body:             |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret
-    ; GFX6: liveins: $vgpr0, $vgpr1
-    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
-    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-    ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
     ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_noret
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -66,6 +63,12 @@ body:             |
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3)
+    ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3)
@@ -81,14 +84,6 @@ body:             |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4
-    ; GFX6: liveins: $vgpr0, $vgpr1
-    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
-    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-    ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4
-    ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
-    ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
     ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_gep4
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -102,6 +97,15 @@ body:             |
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]]
+    ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4
+    ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
+    ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = G_CONSTANT i32 4


        


More information about the llvm-commits mailing list