[llvm] r364804 - AMDGPU/GlobalISel: Try to select VOP3 form of add
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 09:27:33 PDT 2019
Author: arsenm
Date: Mon Jul 1 09:27:32 2019
New Revision: 364804
URL: http://llvm.org/viewvc/llvm-project?rev=364804&view=rev
Log:
AMDGPU/GlobalISel: Try to select VOP3 form of add
There are several things broken, but at least emit the right thing for
gfx9.
The import of the pattern with the unused carry out seems to not
work. Needs a special class for clamp, because OperandWithDefaultOps
doesn't really work.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td?rev=364804&r1=364803&r2=364804&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td Mon Jul 1 09:27:32 2019
@@ -70,6 +70,17 @@ class GISelVop2Pat <
(inst src0_vt:$src0, src1_vt:$src1)
>;
+// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken.
+class GISelVop2ClampingPat <
+ SDPatternOperator node,
+ Instruction inst,
+ ValueType dst_vt,
+ ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat <
+
+ (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))),
+ (inst src0_vt:$src0, src1_vt:$src1, 0)
+>;
+
class GISelVop2CommutePat <
SDPatternOperator node,
Instruction inst,
@@ -129,7 +140,16 @@ def : GISelSop2Pat <or, S_OR_B32, i32>;
def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
def : GISelSop2Pat <add, S_ADD_I32, i32>;
+
+let SubtargetPredicate = NotHasAddNoCarryInsts in {
+// FIXME: This should use the VOP3 form
+//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>;
def : GISelVop2Pat <add, V_ADD_I32_e32, i32>;
+}
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>;
+}
def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
let AddedComplexity = 100 in {
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir?rev=364804&r1=364803&r2=364804&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir Mon Jul 1 09:27:32 2019
@@ -1,24 +1,37 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
-name: add_i32
+name: add_s32
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
- ; GCN-LABEL: name: add_i32
- ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
- ; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
- ; GCN: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
- ; GCN: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+ ; GFX6-LABEL: name: add_s32
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+ ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
+ ; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
+ ; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
+ ; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX9-LABEL: name: add_s32
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+ ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
+ ; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:vgpr(s32) = COPY $vgpr0
More information about the llvm-commits
mailing list