[llvm] 59cf9dd - [AMDGPU][GISel] Enable Selection of ADD3 for G_PTR_ADD
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 24 07:44:29 PDT 2022
Author: Pierre van Houtryve
Date: 2022-08-24T14:44:19Z
New Revision: 59cf9dd92337214d15d4b04f299832a328de195b
URL: https://github.com/llvm/llvm-project/commit/59cf9dd92337214d15d4b04f299832a328de195b
DIFF: https://github.com/llvm/llvm-project/commit/59cf9dd92337214d15d4b04f299832a328de195b.diff
LOG: [AMDGPU][GISel] Enable Selection of ADD3 for G_PTR_ADD
Allows things like `(G_PTR_ADD (G_PTR_ADD a, b), c)` to be
simplified into a single ADD3 instruction instead of two adds.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D131254
Added:
Modified:
llvm/include/llvm/Target/GenericOpcodes.td
llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
llvm/utils/TableGen/CodeGenInstruction.cpp
llvm/utils/TableGen/CodeGenInstruction.h
llvm/utils/TableGen/GlobalISelEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 5652e60d081ca..5cd7b7bf78a71 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -411,8 +411,8 @@ def G_SELECT : GenericInstruction {
// Generic pointer offset.
def G_PTR_ADD : GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
+ let OutOperandList = (outs ptype0:$dst);
+ let InOperandList = (ins ptype0:$src1, type1:$src2);
let hasSideEffects = false;
}
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 5b8b852962f48..7ee4c73a55f77 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -55,6 +55,7 @@ def : GINodeEquiv<G_FCONSTANT, fpimm>;
def : GINodeEquiv<G_IMPLICIT_DEF, undef>;
def : GINodeEquiv<G_FRAME_INDEX, frameindex>;
def : GINodeEquiv<G_BLOCK_ADDR, blockaddress>;
+def : GINodeEquiv<G_PTR_ADD, ptradd>;
def : GINodeEquiv<G_ADD, add>;
def : GINodeEquiv<G_SUB, sub>;
def : GINodeEquiv<G_MUL, mul>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index e882dd707a7f6..378cb9cd3631f 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -109,6 +109,9 @@ def SDTOther : SDTypeProfile<1, 0, [SDTCisVT<0, OtherVT>]>; // for 'vt'.
def SDTUNDEF : SDTypeProfile<1, 0, []>; // for 'undef'.
def SDTUnaryOp : SDTypeProfile<1, 1, []>; // for bitconvert.
+def SDTPtrAddOp : SDTypeProfile<1, 2, [ // ptradd
+ SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisPtrTy<1>
+]>;
def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>
]>;
@@ -373,6 +376,7 @@ def tblockaddress: SDNode<"ISD::TargetBlockAddress", SDTPtrLeaf, [],
def add : SDNode<"ISD::ADD" , SDTIntBinOp ,
[SDNPCommutative, SDNPAssociative]>;
+def ptradd : SDNode<"ISD::ADD" , SDTPtrAddOp, []>;
def sub : SDNode<"ISD::SUB" , SDTIntBinOp>;
def mul : SDNode<"ISD::MUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f2e5c2fe00e89..c1468bd71c8a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3485,6 +3485,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_BUILD_VECTOR_TRUNC:
return selectG_BUILD_VECTOR_TRUNC(I);
case TargetOpcode::G_PTR_ADD:
+ if (selectImpl(I, *CoverageInfo))
+ return true;
return selectG_PTR_ADD(I);
case TargetOpcode::G_IMPLICIT_DEF:
return selectG_IMPLICIT_DEF(I);
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5dc25712c2a42..59955cb50d012 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -596,6 +596,7 @@ class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instructio
def : ThreeOp_i32_Pats<cshl_32, add, V_LSHL_ADD_U32_e64>;
def : ThreeOp_i32_Pats<add, cshl_32, V_ADD_LSHL_U32_e64>;
def : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>;
+def : ThreeOp_i32_Pats<ptradd, ptradd, V_ADD3_U32_e64>;
def : ThreeOp_i32_Pats<cshl_32, or, V_LSHL_OR_B32_e64>;
def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
index 97b2a150ba353..3ed8e5ae144c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
@@ -137,9 +137,8 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]]
+ ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]]
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
@@ -174,9 +173,8 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]]
+ ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index a0438d5723c6a..98b5ea06ce4ce 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -33,8 +33,7 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -63,14 +62,13 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX11-GISEL-LABEL: soff1_voff1:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -117,8 +115,7 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -149,15 +146,14 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX11-GISEL-LABEL: soff1_voff2:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -204,8 +200,7 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -235,15 +230,14 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX11-GISEL-LABEL: soff1_voff4:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -291,8 +285,7 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -323,15 +316,15 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX11-GISEL-LABEL: soff2_voff1:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -380,8 +373,7 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -413,16 +405,15 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX11-GISEL-LABEL: soff2_voff2:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -471,8 +462,7 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -504,16 +494,15 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX11-GISEL-LABEL: soff2_voff4:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -561,8 +550,7 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -594,15 +582,15 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX11-GISEL-LABEL: soff4_voff1:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -651,8 +639,7 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -686,16 +673,15 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX11-GISEL-LABEL: soff4_voff2:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -744,8 +730,7 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2
; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -777,16 +762,15 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX11-GISEL-LABEL: soff4_voff4:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
diff --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp
index ba12633ace8cf..1dbd8ae9c2a27 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -511,9 +511,9 @@ FlattenAsmStringVariants(StringRef Cur, unsigned Variant) {
return Res;
}
-bool CodeGenInstruction::isOperandImpl(unsigned i,
+bool CodeGenInstruction::isOperandImpl(StringRef OpListName, unsigned i,
StringRef PropertyName) const {
- DagInit *ConstraintList = TheDef->getValueAsDag("InOperandList");
+ DagInit *ConstraintList = TheDef->getValueAsDag(OpListName);
if (!ConstraintList || i >= ConstraintList->getNumArgs())
return false;
diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h
index d3de6d95780cf..067ea3b3f4e70 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/llvm/utils/TableGen/CodeGenInstruction.h
@@ -313,17 +313,22 @@ template <typename T> class ArrayRef;
// This can be used on intructions that use typeN or ptypeN to identify
// operands that should be considered as pointers even though SelectionDAG
// didn't make a distinction between integer and pointers.
- bool isOperandAPointer(unsigned i) const {
- return isOperandImpl(i, "IsPointer");
+ bool isInOperandAPointer(unsigned i) const {
+ return isOperandImpl("InOperandList", i, "IsPointer");
+ }
+
+ bool isOutOperandAPointer(unsigned i) const {
+ return isOperandImpl("OutOperandList", i, "IsPointer");
}
/// Check if the operand is required to be an immediate.
- bool isOperandImmArg(unsigned i) const {
- return isOperandImpl(i, "IsImmediate");
+ bool isInOperandImmArg(unsigned i) const {
+ return isOperandImpl("InOperandList", i, "IsImmediate");
}
private:
- bool isOperandImpl(unsigned i, StringRef PropertyName) const;
+ bool isOperandImpl(StringRef OpListName, unsigned i,
+ StringRef PropertyName) const;
};
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 31c3156ba01d9..e49f2da7d27c1 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -4007,8 +4007,10 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
for (const TypeSetByHwMode &VTy : Src->getExtTypes()) {
// Results don't have a name unless they are the root node. The caller will
// set the name if appropriate.
+ const bool OperandIsAPointer =
+ SrcGIOrNull && SrcGIOrNull->isOutOperandAPointer(OpIdx);
OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx);
- if (auto Error = OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */))
+ if (auto Error = OM.addTypeCheckPredicate(VTy, OperandIsAPointer))
return failedImport(toString(std::move(Error)) +
" for result of Src pattern operator");
}
@@ -4156,13 +4158,13 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
// argument that is required to be an immediate, we should not emit an LLT
// type check, and should not be looking for a G_CONSTANT defined
// register.
- bool OperandIsImmArg = SrcGIOrNull->isOperandImmArg(i);
+ bool OperandIsImmArg = SrcGIOrNull->isInOperandImmArg(i);
// SelectionDAG allows pointers to be represented with iN since it doesn't
// distinguish between pointers and integers but they are
diff erent types in GlobalISel.
// Coerce integers to pointers to address space 0 if the context indicates a pointer.
//
- bool OperandIsAPointer = SrcGIOrNull->isOperandAPointer(i);
+ bool OperandIsAPointer = SrcGIOrNull->isInOperandAPointer(i);
if (IsIntrinsic) {
// For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately
More information about the llvm-commits
mailing list