[llvm] 59cf9dd - [AMDGPU][GISel] Enable Selection of ADD3 for G_PTR_ADD

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 24 07:44:29 PDT 2022


Author: Pierre van Houtryve
Date: 2022-08-24T14:44:19Z
New Revision: 59cf9dd92337214d15d4b04f299832a328de195b

URL: https://github.com/llvm/llvm-project/commit/59cf9dd92337214d15d4b04f299832a328de195b
DIFF: https://github.com/llvm/llvm-project/commit/59cf9dd92337214d15d4b04f299832a328de195b.diff

LOG: [AMDGPU][GISel] Enable Selection of ADD3 for G_PTR_ADD

Allows things like `(G_PTR_ADD (G_PTR_ADD a, b), c)` to be
simplified into a single ADD3 instruction instead of two adds.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D131254

Added: 
    

Modified: 
    llvm/include/llvm/Target/GenericOpcodes.td
    llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
    llvm/include/llvm/Target/TargetSelectionDAG.td
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/VOP3Instructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
    llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
    llvm/utils/TableGen/CodeGenInstruction.cpp
    llvm/utils/TableGen/CodeGenInstruction.h
    llvm/utils/TableGen/GlobalISelEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 5652e60d081ca..5cd7b7bf78a71 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -411,8 +411,8 @@ def G_SELECT : GenericInstruction {
 
 // Generic pointer offset.
 def G_PTR_ADD : GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src1, type1:$src2);
+  let OutOperandList = (outs ptype0:$dst);
+  let InOperandList = (ins ptype0:$src1, type1:$src2);
   let hasSideEffects = false;
 }
 

diff  --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 5b8b852962f48..7ee4c73a55f77 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -55,6 +55,7 @@ def : GINodeEquiv<G_FCONSTANT, fpimm>;
 def : GINodeEquiv<G_IMPLICIT_DEF, undef>;
 def : GINodeEquiv<G_FRAME_INDEX, frameindex>;
 def : GINodeEquiv<G_BLOCK_ADDR, blockaddress>;
+def : GINodeEquiv<G_PTR_ADD, ptradd>;
 def : GINodeEquiv<G_ADD, add>;
 def : GINodeEquiv<G_SUB, sub>;
 def : GINodeEquiv<G_MUL, mul>;

diff  --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index e882dd707a7f6..378cb9cd3631f 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -109,6 +109,9 @@ def SDTOther  : SDTypeProfile<1, 0, [SDTCisVT<0, OtherVT>]>; // for 'vt'.
 def SDTUNDEF  : SDTypeProfile<1, 0, []>;                     // for 'undef'.
 def SDTUnaryOp  : SDTypeProfile<1, 1, []>;                   // for bitconvert.
 
+def SDTPtrAddOp : SDTypeProfile<1, 2, [     // ptradd
+  SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisPtrTy<1>
+]>;
 def SDTIntBinOp : SDTypeProfile<1, 2, [     // add, and, or, xor, udiv, etc.
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>
 ]>;
@@ -373,6 +376,7 @@ def tblockaddress: SDNode<"ISD::TargetBlockAddress",  SDTPtrLeaf, [],
 
 def add        : SDNode<"ISD::ADD"       , SDTIntBinOp   ,
                         [SDNPCommutative, SDNPAssociative]>;
+def ptradd     : SDNode<"ISD::ADD"       , SDTPtrAddOp, []>;
 def sub        : SDNode<"ISD::SUB"       , SDTIntBinOp>;
 def mul        : SDNode<"ISD::MUL"       , SDTIntBinOp,
                         [SDNPCommutative, SDNPAssociative]>;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f2e5c2fe00e89..c1468bd71c8a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3485,6 +3485,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
     return selectG_BUILD_VECTOR_TRUNC(I);
   case TargetOpcode::G_PTR_ADD:
+    if (selectImpl(I, *CoverageInfo))
+      return true;
     return selectG_PTR_ADD(I);
   case TargetOpcode::G_IMPLICIT_DEF:
     return selectG_IMPLICIT_DEF(I);

diff  --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5dc25712c2a42..59955cb50d012 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -596,6 +596,7 @@ class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instructio
 def : ThreeOp_i32_Pats<cshl_32, add, V_LSHL_ADD_U32_e64>;
 def : ThreeOp_i32_Pats<add, cshl_32, V_ADD_LSHL_U32_e64>;
 def : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>;
+def : ThreeOp_i32_Pats<ptradd, ptradd, V_ADD3_U32_e64>;
 def : ThreeOp_i32_Pats<cshl_32, or, V_LSHL_OR_B32_e64>;
 def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
 def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
index 97b2a150ba353..3ed8e5ae144c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
@@ -137,9 +137,8 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec
-    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]]
+    ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
+    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = COPY $vgpr2
@@ -174,9 +173,8 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec
-    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]]
+    ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
+    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]]
     %0:vgpr(p5) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = COPY $vgpr2

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index a0438d5723c6a..98b5ea06ce4ce 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -33,8 +33,7 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -63,14 +62,13 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff1_voff1:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -117,8 +115,7 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -149,15 +146,14 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff1_voff2:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -204,8 +200,7 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -235,15 +230,14 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff1_voff4:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -291,8 +285,7 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -323,15 +316,15 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff2_voff1:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -380,8 +373,7 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -413,16 +405,15 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff2_voff2:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -471,8 +462,7 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -504,16 +494,15 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff2_voff4:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -561,8 +550,7 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -594,15 +582,15 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff4_voff1:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -651,8 +639,7 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -686,16 +673,15 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff4_voff2:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -744,8 +730,7 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_add3_u32 v0, v1, s0, v0
 ; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
 ; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
 ; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
@@ -777,16 +762,15 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
 ; GFX11-GISEL-LABEL: soff4_voff4:
 ; GFX11-GISEL:       ; %bb.0: ; %bb
 ; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
-; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT:    v_add3_u32 v0, 4, s0, v0
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
+; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0

diff  --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp
index ba12633ace8cf..1dbd8ae9c2a27 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -511,9 +511,9 @@ FlattenAsmStringVariants(StringRef Cur, unsigned Variant) {
   return Res;
 }
 
-bool CodeGenInstruction::isOperandImpl(unsigned i,
+bool CodeGenInstruction::isOperandImpl(StringRef OpListName, unsigned i,
                                        StringRef PropertyName) const {
-  DagInit *ConstraintList = TheDef->getValueAsDag("InOperandList");
+  DagInit *ConstraintList = TheDef->getValueAsDag(OpListName);
   if (!ConstraintList || i >= ConstraintList->getNumArgs())
     return false;
 

diff  --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h
index d3de6d95780cf..067ea3b3f4e70 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/llvm/utils/TableGen/CodeGenInstruction.h
@@ -313,17 +313,22 @@ template <typename T> class ArrayRef;
     // This can be used on intructions that use typeN or ptypeN to identify
     // operands that should be considered as pointers even though SelectionDAG
     // didn't make a distinction between integer and pointers.
-    bool isOperandAPointer(unsigned i) const {
-      return isOperandImpl(i, "IsPointer");
+    bool isInOperandAPointer(unsigned i) const {
+      return isOperandImpl("InOperandList", i, "IsPointer");
+    }
+
+    bool isOutOperandAPointer(unsigned i) const {
+      return isOperandImpl("OutOperandList", i, "IsPointer");
     }
 
     /// Check if the operand is required to be an immediate.
-    bool isOperandImmArg(unsigned i) const {
-      return isOperandImpl(i, "IsImmediate");
+    bool isInOperandImmArg(unsigned i) const {
+      return isOperandImpl("InOperandList", i, "IsImmediate");
     }
 
   private:
-    bool isOperandImpl(unsigned i, StringRef PropertyName) const;
+    bool isOperandImpl(StringRef OpListName, unsigned i,
+                       StringRef PropertyName) const;
   };
 
 

diff  --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 31c3156ba01d9..e49f2da7d27c1 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -4007,8 +4007,10 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
   for (const TypeSetByHwMode &VTy : Src->getExtTypes()) {
     // Results don't have a name unless they are the root node. The caller will
     // set the name if appropriate.
+    const bool OperandIsAPointer =
+        SrcGIOrNull && SrcGIOrNull->isOutOperandAPointer(OpIdx);
     OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx);
-    if (auto Error = OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */))
+    if (auto Error = OM.addTypeCheckPredicate(VTy, OperandIsAPointer))
       return failedImport(toString(std::move(Error)) +
                           " for result of Src pattern operator");
   }
@@ -4156,13 +4158,13 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       // argument that is required to be an immediate, we should not emit an LLT
       // type check, and should not be looking for a G_CONSTANT defined
       // register.
-      bool OperandIsImmArg = SrcGIOrNull->isOperandImmArg(i);
+      bool OperandIsImmArg = SrcGIOrNull->isInOperandImmArg(i);
 
       // SelectionDAG allows pointers to be represented with iN since it doesn't
       // distinguish between pointers and integers but they are 
diff erent types in GlobalISel.
       // Coerce integers to pointers to address space 0 if the context indicates a pointer.
       //
-      bool OperandIsAPointer = SrcGIOrNull->isOperandAPointer(i);
+      bool OperandIsAPointer = SrcGIOrNull->isInOperandAPointer(i);
 
       if (IsIntrinsic) {
         // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately


        


More information about the llvm-commits mailing list