[llvm] 892ef2e - [AMDGPU] More codegen patterns for v2i16/v2f16 build_vector
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 22 02:41:49 PDT 2020
Author: Jay Foad
Date: 2020-09-22T10:41:38+01:00
New Revision: 892ef2e3c0b60656a95d0d9e9f458b73238b21b7
URL: https://github.com/llvm/llvm-project/commit/892ef2e3c0b60656a95d0d9e9f458b73238b21b7
DIFF: https://github.com/llvm/llvm-project/commit/892ef2e3c0b60656a95d0d9e9f458b73238b21b7.diff
LOG: [AMDGPU] More codegen patterns for v2i16/v2f16 build_vector
It's simpler to do this at codegen time than to do ad-hoc constant
folding of machine instructions in SIFoldOperands.
Differential Revision: https://reviews.llvm.org/D88028
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a24394cdf795..e2c5e44e5408 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1048,25 +1048,6 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
if (!Src0->isImm() && !Src1->isImm())
return false;
- if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 ||
- MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 ||
- MI->getOpcode() == AMDGPU::V_AND_OR_B32) {
- if (Src0->isImm() && Src0->getImm() == 0) {
- // v_lshl_or_b32 0, X, Y -> copy Y
- // v_lshl_or_b32 0, X, K -> v_mov_b32 K
- // v_lshl_add_b32 0, X, Y -> copy Y
- // v_lshl_add_b32 0, X, K -> v_mov_b32 K
- // v_and_or_b32 0, X, Y -> copy Y
- // v_and_or_b32 0, X, K -> v_mov_b32 K
- bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
- MI->RemoveOperand(Src1Idx);
- MI->RemoveOperand(Src0Idx);
-
- MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
- return true;
- }
- }
-
// and k0, k1 -> v_mov_b32 (k0 & k1)
// or k0, k1 -> v_mov_b32 (k0 | k1)
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 51918e3cde94..3215575a0fed 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2046,6 +2046,16 @@ def : GCNPat <
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
+def : GCNPat <
+ (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))),
+ (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
+def : GCNPat <
+ (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
def : GCNPat <
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index c8adce6fed38..8854c4146d42 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -422,64 +422,6 @@ body: |
S_ENDPGM 0, implicit $vcc
...
----
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %2
-
-name: constant_fold_lshl_or_reg0_immreg_reg
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
- S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %2
-
-name: constant_fold_lshl_or_reg0_immreg_imm
-tracksRegLiveness: true
-
-body: |
- bb.0:
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
- S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %3
-
-name: constant_fold_lshl_or_reg0_immreg_immreg
-tracksRegLiveness: true
-
-body: |
- bb.0:
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
- %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
- S_ENDPGM 0, implicit %3
-
-...
-
---
# GCN-LABEL: name: s_fold_andn2_imm_regimm_32{{$}}
# GCN: [[VAL:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1233024, implicit $exec
@@ -820,152 +762,6 @@ body: |
...
----
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name: constant_fold_lshl_add_reg0_immreg_reg
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, $vgpr0, implicit $exec
- S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name: constant_fold_lshl_add_reg0_immreg_imm
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-
-body: |
- bb.0:
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, 10, implicit $exec
- S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name: constant_fold_lshl_add_reg0_immreg_immreg
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-
-body: |
- bb.0:
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
- %3:vgpr_32 = V_LSHL_ADD_U32 %0, %1, %2, implicit $exec
- S_ENDPGM 0, implicit %3
-
-...
-
----
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name: constant_fold_and_or_reg0_immreg_reg
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_AND_OR_B32 %0, %1, $vgpr0, implicit $exec
- S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name: constant_fold_and_or_reg0_immreg_imm
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-
-body: |
- bb.0:
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_AND_OR_B32 %0, %1, 10, implicit $exec
- S_ENDPGM 0, implicit %2
-
-...
-
----
-
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name: constant_fold_and_or_reg0_immreg_immreg
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-
-body: |
- bb.0:
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
- %3:vgpr_32 = V_AND_OR_B32 %0, %1, %2, implicit $exec
- S_ENDPGM 0, implicit %3
-
-...
-
# This used to incorrectly interpret V_MOV_B32_sdwa as being a move
# immediate, and interpreting the src0_modifiers field as a
# materialized immediate.
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index c6af4baee650..74c8b5464902 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -718,11 +718,10 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_reg_reg_v4f16:
; GFX9: s_waitcnt
-; GFX9-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
-; GFX9-NEXT: v_and_b32_e32 v1, [[MASK]], v1
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
-; GFX9-NEXT: v_and_b32_e32 v0, [[MASK]], v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
; GFX9-NEXT: s_setpc_b64
More information about the llvm-commits
mailing list