[llvm] 8f3b2c8 - AMDGPU/GlobalISel: Remove selection of MAD/MAC when not available

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 18 18:31:37 PDT 2020


Author: Carl Ritson
Date: 2020-06-19T10:30:19+09:00
New Revision: 8f3b2c8aa3175628128a32a6bcaecc67efd03514

URL: https://github.com/llvm/llvm-project/commit/8f3b2c8aa3175628128a32a6bcaecc67efd03514
DIFF: https://github.com/llvm/llvm-project/commit/8f3b2c8aa3175628128a32a6bcaecc67efd03514.diff

LOG: AMDGPU/GlobalISel: Remove selection of MAD/MAC when not available

Add code to respect mad-mac-f32-insts target feature.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D81990

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 0192df8e540b..0e6a444b2751 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -612,10 +612,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
 
   // Whether this is legal depends on the floating point mode for the function.
   auto &FMad = getActionDefinitionsBuilder(G_FMAD);
-  if (ST.hasMadF16())
+  if (ST.hasMadF16() && ST.hasMadMacF32Insts())
     FMad.customFor({S32, S16});
-  else
+  else if (ST.hasMadMacF32Insts())
     FMad.customFor({S32});
+  else if (ST.hasMadF16())
+    FMad.customFor({S16});
   FMad.scalarize(0)
       .lower();
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir
index 95515f3593ac..4a72033af517 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir
@@ -1,7 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX101 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX103 %s
 
 ---
 name: test_fmad_s32_flush
@@ -26,12 +27,19 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
     ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
     ; GFX7: $vgpr0 = COPY [[FMAD]](s32)
-    ; GFX10-LABEL: name: test_fmad_s32_flush
-    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
-    ; GFX10: $vgpr0 = COPY [[FMAD]](s32)
+    ; GFX101-LABEL: name: test_fmad_s32_flush
+    ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
+    ; GFX101: $vgpr0 = COPY [[FMAD]](s32)
+    ; GFX103-LABEL: name: test_fmad_s32_flush
+    ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
+    ; GFX103: $vgpr0 = COPY [[FADD]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -62,12 +70,19 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
     ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
     ; GFX7: $vgpr0 = COPY [[FMAD]](s32)
-    ; GFX10-LABEL: name: test_fmad_s32_flags_flush
-    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
-    ; GFX10: $vgpr0 = COPY [[FMAD]](s32)
+    ; GFX101-LABEL: name: test_fmad_s32_flags_flush
+    ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
+    ; GFX101: $vgpr0 = COPY [[FMAD]](s32)
+    ; GFX103-LABEL: name: test_fmad_s32_flags_flush
+    ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
+    ; GFX103: $vgpr0 = COPY [[FADD]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -108,17 +123,30 @@ body: |
     ; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
     ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
     ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX10-LABEL: name: test_fmad_v2s32_flush
-    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
-    ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
-    ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX101-LABEL: name: test_fmad_v2s32_flush
+    ; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
+    ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
+    ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
+    ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
+    ; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX103-LABEL: name: test_fmad_v2s32_flush
+    ; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
+    ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
+    ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
+    ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
+    ; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = COPY $vgpr4_vgpr5
@@ -161,18 +189,33 @@ body: |
     ; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
     ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
     ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-LABEL: name: test_fmad_v3s32_flush
-    ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
-    ; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
-    ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
-    ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
-    ; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX101-LABEL: name: test_fmad_v3s32_flush
+    ; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+    ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+    ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
+    ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
+    ; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
+    ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
+    ; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX103-LABEL: name: test_fmad_v3s32_flush
+    ; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+    ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
+    ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
+    ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
+    ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
+    ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
+    ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
+    ; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
@@ -217,19 +260,36 @@ body: |
     ; GFX7: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
     ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
     ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10-LABEL: name: test_fmad_v4s32_flush
-    ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
-    ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
-    ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
-    ; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
-    ; GFX10: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX101-LABEL: name: test_fmad_v4s32_flush
+    ; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
+    ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
+    ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
+    ; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
+    ; GFX101: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
+    ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
+    ; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX103-LABEL: name: test_fmad_v4s32_flush
+    ; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
+    ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
+    ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
+    ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
+    ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
+    ; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
+    ; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
+    ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
+    ; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
@@ -262,13 +322,20 @@ body: |
     ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
     ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
     ; GFX7: $vgpr0 = COPY [[FADD]](s32)
-    ; GFX10-LABEL: name: test_fmad_s32_denorm
-    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
-    ; GFX10: $vgpr0 = COPY [[FADD]](s32)
+    ; GFX101-LABEL: name: test_fmad_s32_denorm
+    ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
+    ; GFX101: $vgpr0 = COPY [[FADD]](s32)
+    ; GFX103-LABEL: name: test_fmad_s32_denorm
+    ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
+    ; GFX103: $vgpr0 = COPY [[FADD]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -301,13 +368,20 @@ body: |
     ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
     ; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
     ; GFX7: $vgpr0 = COPY [[FADD]](s32)
-    ; GFX10-LABEL: name: test_fmad_s32_flags_denorm
-    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
-    ; GFX10: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
-    ; GFX10: $vgpr0 = COPY [[FADD]](s32)
+    ; GFX101-LABEL: name: test_fmad_s32_flags_denorm
+    ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
+    ; GFX101: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
+    ; GFX101: $vgpr0 = COPY [[FADD]](s32)
+    ; GFX103-LABEL: name: test_fmad_s32_flags_denorm
+    ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
+    ; GFX103: $vgpr0 = COPY [[FADD]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -352,19 +426,32 @@ body: |
     ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
     ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
     ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX10-LABEL: name: test_fmad_v2s32_denorm
-    ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
-    ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
-    ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
-    ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
-    ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX101-LABEL: name: test_fmad_v2s32_denorm
+    ; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
+    ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
+    ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
+    ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
+    ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
+    ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
+    ; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX103-LABEL: name: test_fmad_v2s32_denorm
+    ; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
+    ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
+    ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
+    ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
+    ; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = COPY $vgpr4_vgpr5
@@ -413,21 +500,36 @@ body: |
     ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
     ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
     ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-LABEL: name: test_fmad_v3s32_denorm
-    ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
-    ; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
-    ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
-    ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
-    ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
-    ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
-    ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
-    ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX101-LABEL: name: test_fmad_v3s32_denorm
+    ; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+    ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+    ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
+    ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
+    ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
+    ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
+    ; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
+    ; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
+    ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
+    ; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX103-LABEL: name: test_fmad_v3s32_denorm
+    ; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+    ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
+    ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
+    ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
+    ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
+    ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
+    ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
+    ; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
@@ -480,23 +582,40 @@ body: |
     ; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
     ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
     ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10-LABEL: name: test_fmad_v4s32_denorm
-    ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
-    ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
-    ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
-    ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
-    ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
-    ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
-    ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
-    ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
-    ; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX101-LABEL: name: test_fmad_v4s32_denorm
+    ; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
+    ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
+    ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
+    ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
+    ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
+    ; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
+    ; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
+    ; GFX101: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
+    ; GFX101: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
+    ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
+    ; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX103-LABEL: name: test_fmad_v4s32_denorm
+    ; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
+    ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
+    ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
+    ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
+    ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
+    ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
+    ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
+    ; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
+    ; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
+    ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
+    ; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index b2e09226bc48..219bcce04da1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
 
 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
 
@@ -3147,7 +3147,7 @@ define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
 ; CGP-NEXT:    v_rcp_f32_e32 v2, v1
 ; CGP-NEXT:    v_mul_f32_e32 v2, v0, v2
 ; CGP-NEXT:    v_trunc_f32_e32 v2, v2
-; CGP-NEXT:    v_fma_f32 v0, -v2, v1, v0
+; CGP-NEXT:    v_mad_f32 v0, -v2, v1, v0
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v1
 ; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3444,9 +3444,9 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_f32_e32 v6, v2, v6
 ; CGP-NEXT:    v_trunc_f32_e32 v5, v5
 ; CGP-NEXT:    v_trunc_f32_e32 v6, v6
-; CGP-NEXT:    v_fma_f32 v0, -v5, v3, v0
+; CGP-NEXT:    v_mad_f32 v0, -v5, v3, v0
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_fma_f32 v2, -v6, v4, v2
+; CGP-NEXT:    v_mad_f32 v2, -v6, v4, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index ca5886b18b3d..3e43bcf0409c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
 
 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
 
@@ -3116,7 +3116,7 @@ define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
 ; CGP-NEXT:    v_rcp_f32_e32 v4, v3
 ; CGP-NEXT:    v_mul_f32_e32 v4, v2, v4
 ; CGP-NEXT:    v_trunc_f32_e32 v4, v4
-; CGP-NEXT:    v_fma_f32 v2, -v4, v3, v2
+; CGP-NEXT:    v_mad_f32 v2, -v4, v3, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
@@ -3411,9 +3411,9 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_f32_e32 v10, v7, v10
 ; CGP-NEXT:    v_trunc_f32_e32 v9, v9
 ; CGP-NEXT:    v_trunc_f32_e32 v10, v10
-; CGP-NEXT:    v_fma_f32 v5, -v9, v6, v5
+; CGP-NEXT:    v_mad_f32 v5, -v9, v6, v5
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v9
-; CGP-NEXT:    v_fma_f32 v7, -v10, v8, v7
+; CGP-NEXT:    v_mad_f32 v7, -v10, v8, v7
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
 ; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v5|, v6
 ; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]


        


More information about the llvm-commits mailing list