[PATCH] D63988: AMDGPU/GlobalISel: Try to select VOP3 form of add

Matt Arsenault via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 05:37:56 PDT 2019


arsenm created this revision.
arsenm added reviewers: tstellar, nhaehnle.
Herald added subscribers: Petar.Avramovic, dexonsmith, t-tye, tpr, dstuttard, kristof.beyls, rovka, mehdi_amini, yaxunl, wdng, jvesely, kzhuravl.

There are several things broken, but at least emit the right thing for
gfx9.

      

The import of the pattern with the unused carry out seems to not
work. Needs a special class for clamp, because OperandWithDefaultOps
doesn't really work.


https://reviews.llvm.org/D63988

Files:
  lib/Target/AMDGPU/AMDGPUGISel.td
  test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir


Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
@@ -0,0 +1,56 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX9 %s
+
+---
+name:            add_s32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
+
+    ; GFX6-LABEL: name: add_s32
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+    ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
+    ; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
+    ; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
+    ; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: add_s32
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+    ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:vgpr(s32) = COPY $vgpr0
+    %3:vgpr(p1) = COPY $vgpr3_vgpr4
+    %4:sgpr(s32) = G_CONSTANT i32 1
+    %5:sgpr(s32) = G_CONSTANT i32 4096
+
+    ; add ss
+    %6:sgpr(s32) = G_ADD %0, %1
+
+    ; add vs
+    %7:vgpr(s32) = G_ADD %2, %6
+
+    ; add sv
+    %8:vgpr(s32) = G_ADD %6, %7
+
+    ; add vv
+    %9:vgpr(s32) = G_ADD %8, %2
+
+    G_STORE %9, %3 :: (store 4, addrspace 1)
+
+...
Index: lib/Target/AMDGPU/AMDGPUGISel.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUGISel.td
+++ lib/Target/AMDGPU/AMDGPUGISel.td
@@ -70,6 +70,17 @@
   (inst src0_vt:$src0, src1_vt:$src1)
 >;
 
+// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken.
+class GISelVop2ClampingPat <
+  SDPatternOperator node,
+  Instruction inst,
+  ValueType dst_vt,
+  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat <
+
+  (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))),
+  (inst src0_vt:$src0, src1_vt:$src1, 0)
+>;
+
 class GISelVop2CommutePat <
   SDPatternOperator node,
   Instruction inst,
@@ -129,7 +140,16 @@
 def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
 
 def : GISelSop2Pat <add, S_ADD_I32, i32>;
+
+let SubtargetPredicate = NotHasAddNoCarryInsts in {
+// FIXME: This should use the VOP3 form
+//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>;
 def : GISelVop2Pat <add, V_ADD_I32_e32, i32>;
+}
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>;
+}
 
 def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
 let AddedComplexity = 100 in {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D63988.207265.patch
Type: text/x-patch
Size: 4164 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190701/10b36c93/attachment.bin>


More information about the llvm-commits mailing list