[llvm] 892ef2e - [AMDGPU] More codegen patterns for v2i16/v2f16 build_vector

Tue Sep 22 02:41:49 PDT 2020

Author: Jay Foad
Date: 2020-09-22T10:41:38+01:00
New Revision: 892ef2e3c0b60656a95d0d9e9f458b73238b21b7

URL: https://github.com/llvm/llvm-project/commit/892ef2e3c0b60656a95d0d9e9f458b73238b21b7
DIFF: https://github.com/llvm/llvm-project/commit/892ef2e3c0b60656a95d0d9e9f458b73238b21b7.diff

LOG: [AMDGPU] More codegen patterns for v2i16/v2f16 build_vector

It's simpler to do this at codegen time than to do ad-hoc constant
folding of machine instructions in SIFoldOperands.

Differential Revision: https://reviews.llvm.org/D88028

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
    llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a24394cdf795..e2c5e44e5408 100644

--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1048,25 +1048,6 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
   if (!Src0->isImm() && !Src1->isImm())
     return false;
 
-  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 ||
-      MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 ||
-      MI->getOpcode() == AMDGPU::V_AND_OR_B32) {
-    if (Src0->isImm() && Src0->getImm() == 0) {
-      // v_lshl_or_b32 0, X, Y -> copy Y
-      // v_lshl_or_b32 0, X, K -> v_mov_b32 K
-      // v_lshl_add_b32 0, X, Y -> copy Y
-      // v_lshl_add_b32 0, X, K -> v_mov_b32 K
-      // v_and_or_b32 0, X, Y -> copy Y
-      // v_and_or_b32 0, X, K -> v_mov_b32 K
-      bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
-      MI->RemoveOperand(Src1Idx);
-      MI->RemoveOperand(Src0Idx);
-
-      MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
-      return true;
-    }
-  }
-
   // and k0, k1 -> v_mov_b32 (k0 & k1)
   // or k0, k1 -> v_mov_b32 (k0 | k1)
   // xor k0, k1 -> v_mov_b32 (k0 ^ k1)

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 51918e3cde94..3215575a0fed 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2046,6 +2046,16 @@ def : GCNPat <
   (S_LSHL_B32 SReg_32:$src1, (i16 16))
 >;
 
+def : GCNPat <
+  (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))),
+  (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
+def : GCNPat <
+  (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))),
+  (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
 def : GCNPat <
   (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
   (COPY_TO_REGCLASS SReg_32:$src0, SReg_32)

diff  --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index c8adce6fed38..8854c4146d42 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -422,64 +422,6 @@ body:             |
     S_ENDPGM 0, implicit $vcc
 
 ...
----
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %2
-
-name: constant_fold_lshl_or_reg0_immreg_reg
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-    liveins: $vgpr0
-
-  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-  %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
-  S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %2
-
-name: constant_fold_lshl_or_reg0_immreg_imm
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-  %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
-  S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %3
-
-name: constant_fold_lshl_or_reg0_immreg_immreg
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-  %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-  %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
-  S_ENDPGM 0, implicit %3
-
-...
-
 ---
 # GCN-LABEL: name: s_fold_andn2_imm_regimm_32{{$}}
 # GCN: [[VAL:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1233024, implicit $exec
@@ -820,152 +762,6 @@ body:             |
 
 ...
 
----
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_lshl_add_reg0_immreg_reg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-    liveins: $vgpr0
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, $vgpr0, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_lshl_add_reg0_immreg_imm
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, 10, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_lshl_add_reg0_immreg_immreg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-    %3:vgpr_32 = V_LSHL_ADD_U32 %0, %1, %2, implicit $exec
-    S_ENDPGM 0, implicit %3
-
-...
-
----
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_and_or_reg0_immreg_reg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-    liveins: $vgpr0
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_AND_OR_B32 %0, %1, $vgpr0, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_and_or_reg0_immreg_imm
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_AND_OR_B32 %0, %1, 10, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_and_or_reg0_immreg_immreg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-    %3:vgpr_32 = V_AND_OR_B32 %0, %1, %2, implicit $exec
-    S_ENDPGM 0, implicit %3
-
-...
-
 # This used to incorrectly interpret V_MOV_B32_sdwa as being a move
 # immediate, and interpreting the src0_modifiers field as a
 # materialized immediate.

diff  --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index c6af4baee650..74c8b5464902 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -718,11 +718,10 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_reg_reg_v4f16:
 ; GFX9: s_waitcnt
-; GFX9-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
-; GFX9-NEXT: v_and_b32_e32 v1, [[MASK]], v1
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
-; GFX9-NEXT: v_and_b32_e32 v0, [[MASK]], v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
 ; GFX9-NEXT: s_setpc_b64