[llvm] r364804 - AMDGPU/GlobalISel: Try to select VOP3 form of add

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 09:27:33 PDT 2019


Author: arsenm
Date: Mon Jul  1 09:27:32 2019
New Revision: 364804

URL: http://llvm.org/viewvc/llvm-project?rev=364804&view=rev
Log:
AMDGPU/GlobalISel: Try to select VOP3 form of add

There are several things broken, but at least emit the right thing for
gfx9.

The import of the pattern with the unused carry out seems to not
work. Needs a special class for clamp, because OperandWithDefaultOps
doesn't really work.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td?rev=364804&r1=364803&r2=364804&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td Mon Jul  1 09:27:32 2019
@@ -70,6 +70,17 @@ class GISelVop2Pat <
   (inst src0_vt:$src0, src1_vt:$src1)
 >;
 
+// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken.
+class GISelVop2ClampingPat <
+  SDPatternOperator node,
+  Instruction inst,
+  ValueType dst_vt,
+  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat <
+
+  (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))),
+  (inst src0_vt:$src0, src1_vt:$src1, 0)
+>;
+
 class GISelVop2CommutePat <
   SDPatternOperator node,
   Instruction inst,
@@ -129,7 +140,16 @@ def : GISelSop2Pat <or, S_OR_B32, i32>;
 def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
 
 def : GISelSop2Pat <add, S_ADD_I32, i32>;
+
+let SubtargetPredicate = NotHasAddNoCarryInsts in {
+// FIXME: This should use the VOP3 form
+//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>;
 def : GISelVop2Pat <add, V_ADD_I32_e32, i32>;
+}
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>;
+}
 
 def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
 let AddedComplexity = 100 in {

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir?rev=364804&r1=364803&r2=364804&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir Mon Jul  1 09:27:32 2019
@@ -1,24 +1,37 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX9 %s
 
 ---
-name:            add_i32
+name:            add_s32
 legalized:       true
 regBankSelected: true
 
 body: |
   bb.0:
     liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
-    ; GCN-LABEL: name: add_i32
-    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
-    ; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
-    ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+    ; GFX6-LABEL: name: add_s32
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+    ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
+    ; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
+    ; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
+    ; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: add_s32
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+    ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s32) = COPY $sgpr1
     %2:vgpr(s32) = COPY $vgpr0




More information about the llvm-commits mailing list